diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index 6e8050a499635..651a54db4c203 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -47,6 +47,7 @@ let inherit (lib) cmakeBool cmakeFeature + optionalAttrs optionals strings ; @@ -197,7 +198,7 @@ effectiveStdenv.mkDerivation (finalAttrs: { ]; # Environment variables needed for ROCm - env = optionals useRocm { + env = optionalAttrs useRocm { ROCM_PATH = "${rocmPackages.clr}"; HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; }; diff --git a/.devops/tools.sh b/.devops/tools.sh index 41a6b1e55c7d2..8a3a69340059c 100755 --- a/.devops/tools.sh +++ b/.devops/tools.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e # Read the first argument into a variable diff --git a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml b/.github/ISSUE_TEMPLATE/010-bug-compilation.yml index b85bf5741e5a3..95a0b5cc75bde 100644 --- a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +++ b/.github/ISSUE_TEMPLATE/010-bug-compilation.yml @@ -40,7 +40,7 @@ body: attributes: label: GGML backends description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] + options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL] multiple: true validations: required: true diff --git a/.github/ISSUE_TEMPLATE/011-bug-results.yml b/.github/ISSUE_TEMPLATE/011-bug-results.yml index 1ccef0793d45e..d1034bbb6910e 100644 --- a/.github/ISSUE_TEMPLATE/011-bug-results.yml +++ b/.github/ISSUE_TEMPLATE/011-bug-results.yml @@ -42,7 +42,7 @@ body: attributes: label: GGML backends description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] + options: [AMX, BLAS, CPU, CUDA, HIP, Metal, Musa, RPC, SYCL, Vulkan, OpenCL] multiple: true validations: required: true diff --git a/.github/labeler.yml b/.github/labeler.yml index 3c2f67707b024..df6a7a40ed910 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -1,10 +1,4 @@ # https://github.com/actions/labeler -Kompute: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-kompute.h - - ggml/src/ggml-kompute/** - - README-kompute.md Apple Metal: - changed-files: - any-glob-to-any-file: @@ -93,3 +87,8 @@ Ascend NPU: - ggml/include/ggml-cann.h - ggml/src/ggml-cann/** - docs/backend/CANN.md +OpenCL: + - changed-files: + - any-glob-to-any-file: + - ggml/include/ggml-opencl.h + - ggml/src/ggml-opencl/** diff --git a/.github/workflows/build-cmake-pkg.yml b/.github/workflows/build-cmake-pkg.yml new file mode 100644 index 0000000000000..fee2ab96bd0e8 --- /dev/null +++ b/.github/workflows/build-cmake-pkg.yml @@ -0,0 +1,51 @@ +name: Build relocatable cmake package +on: + workflow_dispatch: + workflow_call: + +jobs: + linux: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y build-essential tcl + + - name: Build + run: | + PREFIX="$(pwd)"/inst + cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX" \ + -DLLAMA_CURL=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=OFF \ + -DLLAMA_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=Release + cmake --build build --config Release + cmake --install build --prefix "$PREFIX" --config Release + + export LLAMA_CONFIG="$PREFIX"/lib/cmake/llama/llama-config.cmake + tclsh <<'EOF' + set build(commit) [string trim [exec git rev-parse --short HEAD]] + set build(number) [string trim [exec git rev-list --count HEAD]] + set build(version) "0.0.$build(number)" + + set llamaconfig [read [open "$env(LLAMA_CONFIG)" r]] + set checks [list "set\\(LLAMA_VERSION \\s+$build(version)\\)" \ + "set\\(LLAMA_BUILD_COMMIT\\s+$build(commit)\\)" \ + "set\\(LLAMA_BUILD_NUMBER\\s+$build(number)\\)"] + + puts -nonewline "Checking llama-config.cmake version... " + foreach check $checks { + if {![regexp -expanded -- $check $llamaconfig]} { + puts "\"$check\" failed!" + exit 1 + } + } + puts "success." + EOF + + cd examples/simple-cmake-pkg + cmake -S . -B build -DCMAKE_PREFIX_PATH="$PREFIX"/lib/cmake + cmake --build build diff --git a/.github/workflows/build-linux-cross.yml b/.github/workflows/build-linux-cross.yml index 7cfc82ba4e277..04ad187d35c09 100644 --- a/.github/workflows/build-linux-cross.yml +++ b/.github/workflows/build-linux-cross.yml @@ -48,98 +48,98 @@ jobs: cmake --build build --config Release -j $(nproc) - ubuntu-24-riscv64-vulkan-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup Riscv - run: | - sudo dpkg --add-architecture riscv64 - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - glslc \ - gcc-14-riscv64-linux-gnu \ - g++-14-riscv64-linux-gnu \ - libvulkan-dev:riscv64 - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_VULKAN=ON \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ - -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) - - ubuntu-24-arm64-vulkan-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup Arm64 - run: | - sudo dpkg --add-architecture arm64 - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list - deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - glslc \ - crossbuild-essential-arm64 \ - libvulkan-dev:arm64 - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_VULKAN=ON \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ - -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ - -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) + # ubuntu-24-riscv64-vulkan-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v4 + # - name: Setup Riscv + # run: | + # sudo dpkg --add-architecture riscv64 + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/riscv64-ports.list + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=riscv64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # glslc \ + # gcc-14-riscv64-linux-gnu \ + # g++-14-riscv64-linux-gnu \ + # libvulkan-dev:riscv64 + + # - name: Build + # run: | + # cmake -B build -DLLAMA_CURL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_VULKAN=ON \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=riscv64 \ + # -DCMAKE_C_COMPILER=riscv64-linux-gnu-gcc-14 \ + # -DCMAKE_CXX_COMPILER=riscv64-linux-gnu-g++-14 \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/riscv64-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) + + # ubuntu-24-arm64-vulkan-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v4 + # - name: Setup Arm64 + # run: | + # sudo dpkg --add-architecture arm64 + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/arm64-ports.list + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # glslc \ + # crossbuild-essential-arm64 \ + # libvulkan-dev:arm64 + + # - name: Build + # run: | + # cmake -B build -DLLAMA_CURL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_VULKAN=ON \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + # -DCMAKE_C_COMPILER=aarch64-linux-gnu-gcc \ + # -DCMAKE_CXX_COMPILER=aarch64-linux-gnu-g++ \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/aarch64-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) ubuntu-24-ppc64el-cpu-cross: runs-on: ubuntu-24.04 @@ -185,52 +185,52 @@ jobs: cmake --build build --config Release -j $(nproc) - ubuntu-24-ppc64el-vulkan-cross: - runs-on: ubuntu-24.04 - - steps: - - uses: actions/checkout@v4 - - name: Setup PowerPC64le - run: | - sudo dpkg --add-architecture ppc64el - - # Add arch-specific repositories for non-amd64 architectures - cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe - deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe - EOF - - sudo apt-get update || true ;# Prevent failure due to missing URLs. - - sudo apt-get install -y --no-install-recommends \ - build-essential \ - glslc \ - gcc-14-powerpc64le-linux-gnu \ - g++-14-powerpc64le-linux-gnu \ - libvulkan-dev:ppc64el - - - name: Build - run: | - cmake -B build -DLLAMA_CURL=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DGGML_VULKAN=ON \ - -DGGML_OPENMP=OFF \ - -DLLAMA_BUILD_EXAMPLES=ON \ - -DLLAMA_BUILD_TOOLS=ON \ - -DLLAMA_BUILD_TESTS=OFF \ - -DCMAKE_SYSTEM_NAME=Linux \ - -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ - -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ - -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ - -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ - -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ - -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ - -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ - -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH - - cmake --build build --config Release -j $(nproc) + # ubuntu-24-ppc64el-vulkan-cross: + # runs-on: ubuntu-24.04 + + # steps: + # - uses: actions/checkout@v4 + # - name: Setup PowerPC64le + # run: | + # sudo dpkg --add-architecture ppc64el + + # # Add arch-specific repositories for non-amd64 architectures + # cat << EOF | sudo tee /etc/apt/sources.list.d/ppc64el-ports.list + # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble main universe + # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-updates main universe + # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-security main universe + # deb [arch=ppc64el] http://ports.ubuntu.com/ubuntu-ports/ noble-backports main universe + # EOF + + # sudo apt-get update || true ;# Prevent failure due to missing URLs. + + # sudo apt-get install -y --no-install-recommends \ + # build-essential \ + # glslc \ + # gcc-14-powerpc64le-linux-gnu \ + # g++-14-powerpc64le-linux-gnu \ + # libvulkan-dev:ppc64el + + # - name: Build + # run: | + # cmake -B build -DLLAMA_CURL=OFF \ + # -DCMAKE_BUILD_TYPE=Release \ + # -DGGML_VULKAN=ON \ + # -DGGML_OPENMP=OFF \ + # -DLLAMA_BUILD_EXAMPLES=ON \ + # -DLLAMA_BUILD_TOOLS=ON \ + # -DLLAMA_BUILD_TESTS=OFF \ + # -DCMAKE_SYSTEM_NAME=Linux \ + # -DCMAKE_SYSTEM_PROCESSOR=ppc64 \ + # -DCMAKE_C_COMPILER=powerpc64le-linux-gnu-gcc-14 \ + # -DCMAKE_CXX_COMPILER=powerpc64le-linux-gnu-g++-14 \ + # -DCMAKE_POSITION_INDEPENDENT_CODE=ON \ + # -DCMAKE_FIND_ROOT_PATH=/usr/lib/powerpc64le-linux-gnu \ + # -DCMAKE_FIND_ROOT_PATH_MODE_PROGRAM=NEVER \ + # -DCMAKE_FIND_ROOT_PATH_MODE_LIBRARY=ONLY \ + # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH + + # cmake --build build --config Release -j $(nproc) debian-13-loongarch64-cpu-cross: runs-on: ubuntu-24.04 diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 5422dd81723f9..5bd988b7f7ce3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -5,10 +5,43 @@ on: push: branches: - master - paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: [ + '.github/workflows/build.yml', + '.github/workflows/build-linux-cross.yml', + '.github/workflows/build-cmake-pkg.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp' + ] + pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/build.yml', '.github/workflows/build-linux-cross.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: [ + '.github/workflows/build.yml', + '.github/workflows/build-linux-cross.yml', + '.github/workflows/build-cmake-pkg.yml', + '**/CMakeLists.txt', + '**/.cmake', + '**/*.h', + '**/*.hpp', + '**/*.c', + '**/*.cpp', + '**/*.cu', + '**/*.cuh', + '**/*.swift', + '**/*.m', + '**/*.metal', + '**/*.comp' + ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} @@ -51,7 +84,8 @@ jobs: -DCMAKE_BUILD_RPATH="@loader_path" \ -DLLAMA_FATAL_WARNINGS=ON \ -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ + -DGGML_METAL_EMBED_LIBRARY=OFF \ + -DGGML_METAL_SHADER_DEBUG=ON \ -DGGML_RPC=ON cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) @@ -101,6 +135,69 @@ jobs: cd build ctest -L main --verbose --timeout 900 + macOS-latest-cmake-arm64-webgpu: + runs-on: macos-14 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: macOS-latest-cmake-arm64-webgpu + evict-old-files: 1d + + - name: Dependencies + id: depends + continue-on-error: true + run: | + brew update + brew install curl + + - name: Dawn Dependency + id: dawn-depends + run: | + ARTIFACTS_JSON=$(curl -s -L \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/google/dawn/actions/artifacts") + echo "Finding latest macos-latest-Release artifact..." + DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts + | sort_by(.created_at) + | reverse + | map(select(.name | test("macos-latest-Release$"))) + | .[0].archive_download_url') + if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then + echo "No suitable Dawn artifact found!" + exit 1 + fi + echo "Downloading from: $DOWNLOAD_URL" + curl -L \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -o artifact.zip "$DOWNLOAD_URL" + unzip artifact.zip + mkdir dawn + tar_file=$(find . -name '*.tar.gz' | head -n 1) + echo "Extracting: $tar_file" + tar -xvf "$tar_file" -C dawn --strip-components=1 + + - name: Build + id: cmake_build + run: | + export CMAKE_PREFIX_PATH=dawn + cmake -B build -DGGML_WEBGPU=ON -DGGML_METAL=OFF -DGGML_BLAS=OFF + cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) + + - name: Test + id: cmake_test + run: | + cd build + ctest -L main --verbose --timeout 900 + ubuntu-cpu-cmake: strategy: matrix: @@ -308,6 +405,72 @@ jobs: cd build export GGML_VK_VISIBLE_DEVICES=0 # This is using llvmpipe and runs slower than other backends + ctest -L main --verbose --timeout 4200 + + ubuntu-22-cmake-webgpu: + runs-on: ubuntu-22.04 + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v4 + + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2.16 + with: + key: ubuntu-22-cmake-webgpu + evict-old-files: 1d + + - name: Vulkan SDK Dependencies + id: vulkan-depends + run: | + wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - + sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list + sudo apt-get update -y + sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libcurl4-openssl-dev + + - name: Dawn Dependency + id: dawn-depends + run: | + sudo apt-get install -y libxrandr-dev libxinerama-dev libxcursor-dev mesa-common-dev libx11-xcb-dev libxi-dev + ARTIFACTS_JSON=$(curl -s -L \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "https://api.github.com/repos/google/dawn/actions/artifacts") + echo "Finding latest ubuntu-latest-Release artifact..." + DOWNLOAD_URL=$(echo "$ARTIFACTS_JSON" | jq -r '.artifacts + | sort_by(.created_at) + | reverse + | map(select(.name | test("ubuntu-latest-Release$"))) + | .[0].archive_download_url') + if [ "$DOWNLOAD_URL" = "null" ] || [ -z "$DOWNLOAD_URL" ]; then + echo "No suitable Dawn artifact found!" + exit 1 + fi + echo "Downloading from: $DOWNLOAD_URL" + curl -L \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${{ secrets.GITHUB_TOKEN }}" \ + -o artifact.zip "$DOWNLOAD_URL" + unzip artifact.zip + mkdir dawn + tar_file=$(find . -name '*.tar.gz' | head -n 1) + echo "Extracting: $tar_file" + tar -xvf "$tar_file" -C dawn --strip-components=1 + + - name: Build + id: cmake_build + run: | + export Dawn_DIR=dawn/lib64/cmake/Dawn + cmake -B build -DGGML_WEBGPU=ON + cmake --build build --config Release -j $(nproc) + + - name: Test + id: cmake_test + run: | + cd build + # This is using llvmpipe and runs slower than other backends ctest -L main --verbose --timeout 3600 ubuntu-22-cmake-hip: @@ -478,6 +641,9 @@ jobs: build-linux-cross: uses: ./.github/workflows/build-linux-cross.yml + build-cmake-pkg: + uses: ./.github/workflows/build-cmake-pkg.yml + macOS-latest-cmake-ios: runs-on: macos-latest @@ -628,7 +794,7 @@ jobs: ./build-xcframework.sh windows-msys2: - runs-on: windows-latest + runs-on: windows-2025 strategy: fail-fast: false @@ -678,28 +844,31 @@ jobs: cmake --build build --config ${{ matrix.build }} -j $(nproc) windows-latest-cmake: - runs-on: windows-latest + runs-on: windows-2025 env: OPENBLAS_VERSION: 0.3.23 SDE_VERSION: 9.33.0-2024-01-07 - VULKAN_VERSION: 1.4.309.0 + VULKAN_VERSION: 1.4.313.2 strategy: matrix: include: - build: 'cpu-x64 (static)' + arch: 'x64' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF' - build: 'openblas-x64' + arch: 'x64' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - build: 'vulkan-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' + arch: 'x64' + defines: '-DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_VULKAN=ON' - build: 'llvm-arm64' + arch: 'arm64' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - build: 'llvm-arm64-opencl-adreno' + arch: 'arm64' defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' - # - build: 'kompute-x64' - # defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/x64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_OPENMP=OFF -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON' steps: - name: Clone @@ -713,12 +882,6 @@ jobs: variant: ccache evict-old-files: 1d - - name: Clone Kompute submodule - id: clone_kompute - if: ${{ matrix.build == 'kompute-x64' }} - run: | - git submodule update --init ggml/src/ggml-kompute/kompute - - name: Download OpenBLAS id: get_openblas if: ${{ matrix.build == 'openblas-x64' }} @@ -734,9 +897,9 @@ jobs: - name: Install Vulkan SDK id: get_vulkan - if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }} + if: ${{ matrix.build == 'vulkan-x64' }} run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" @@ -769,6 +932,8 @@ jobs: - name: libCURL id: get_libcurl uses: ./.github/actions/windows-setup-curl + with: + architecture: ${{ matrix.arch == 'x64' && 'win64' || 'win64a' }} - name: Build id: cmake_build @@ -778,6 +943,7 @@ jobs: cmake -S . -B build ${{ matrix.defines }} ` -DCURL_LIBRARY="$env:CURL_PATH/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:CURL_PATH/include" cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} + cp $env:CURL_PATH/bin/libcurl-*.dll build/bin/Release - name: Add libopenblas.dll id: add_libopenblas_dll @@ -788,7 +954,7 @@ jobs: - name: Test id: cmake_test - if: ${{ matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' }} + if: ${{ matrix.arch == 'x64' }} run: | cd build ctest -L main -C Release --verbose --timeout 900 @@ -893,7 +1059,7 @@ jobs: cmake --build build --config Release windows-latest-cmake-sycl: - runs-on: windows-latest + runs-on: windows-2022 defaults: run: @@ -927,7 +1093,7 @@ jobs: windows-latest-cmake-hip: if: ${{ github.event.inputs.create_release != 'true' }} - runs-on: windows-latest + runs-on: windows-2022 steps: - name: Clone diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9874736cbd8de..4ed6126f487c0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -49,7 +49,8 @@ jobs: run: | sysctl -a cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ + -DCMAKE_INSTALL_RPATH='@loader_path' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DLLAMA_FATAL_WARNINGS=ON \ -DGGML_METAL_USE_BF16=ON \ -DGGML_METAL_EMBED_LIBRARY=ON \ @@ -103,7 +104,8 @@ jobs: # Metal is disabled due to intermittent failures with Github runners not having a GPU: # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ + -DCMAKE_INSTALL_RPATH='@loader_path' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DLLAMA_FATAL_WARNINGS=ON \ -DGGML_METAL=OFF \ -DGGML_RPC=ON @@ -160,6 +162,8 @@ jobs: id: cmake_build run: | cmake -B build \ + -DCMAKE_INSTALL_RPATH='$ORIGIN' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DGGML_BACKEND_DL=ON \ -DGGML_NATIVE=OFF \ -DGGML_CPU_ALL_VARIANTS=ON \ @@ -211,6 +215,8 @@ jobs: id: cmake_build run: | cmake -B build \ + -DCMAKE_INSTALL_RPATH='$ORIGIN' \ + -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \ -DGGML_BACKEND_DL=ON \ -DGGML_NATIVE=OFF \ -DGGML_CPU_ALL_VARIANTS=ON \ @@ -235,7 +241,7 @@ jobs: name: llama-bin-ubuntu-vulkan-x64.zip windows-cpu: - runs-on: windows-latest + runs-on: windows-2025 strategy: matrix: @@ -271,7 +277,7 @@ jobs: env: CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | - call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch }} + call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }} cmake -S . -B build -G "Ninja Multi-Config" ^ -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^ -DGGML_NATIVE=OFF ^ @@ -288,7 +294,7 @@ jobs: CURL_PATH: ${{ steps.get_libcurl.outputs.curl_path }} run: | Copy-Item $env:CURL_PATH\bin\libcurl-${{ matrix.arch }}.dll .\build\bin\Release\ - Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.42.34433\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ + Copy-Item "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Redist\MSVC\14.44.35112\debug_nonredist\${{ matrix.arch }}\Microsoft.VC143.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\ 7z a llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\* - name: Upload artifacts @@ -298,11 +304,11 @@ jobs: name: llama-bin-win-cpu-${{ matrix.arch }}.zip windows: - runs-on: windows-latest + runs-on: windows-2025 env: OPENBLAS_VERSION: 0.3.23 - VULKAN_VERSION: 1.4.309.0 + VULKAN_VERSION: 1.4.313.2 strategy: matrix: @@ -332,7 +338,7 @@ jobs: id: get_vulkan if: ${{ matrix.backend == 'vulkan' }} run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" + curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe" & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" @@ -448,7 +454,7 @@ jobs: name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip windows-sycl: - runs-on: windows-latest + runs-on: windows-2022 defaults: run: @@ -520,7 +526,7 @@ jobs: name: llama-bin-win-sycl-x64.zip windows-hip: - runs-on: windows-latest + runs-on: windows-2022 strategy: matrix: diff --git a/.github/workflows/update-ops-docs.yml b/.github/workflows/update-ops-docs.yml new file mode 100644 index 0000000000000..c0218fa742173 --- /dev/null +++ b/.github/workflows/update-ops-docs.yml @@ -0,0 +1,40 @@ +name: Update Operations Documentation + +on: + push: + paths: + - 'docs/ops/**' + - 'scripts/create_ops_docs.py' + pull_request: + paths: + - 'docs/ops/**' + - 'scripts/create_ops_docs.py' + +jobs: + update-ops-docs: + runs-on: ubuntu-latest + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + + - name: Generate operations documentation to temporary file + run: | + mkdir -p /tmp/ops_check + ./scripts/create_ops_docs.py /tmp/ops_check/ops.md + + - name: Check if docs/ops.md matches generated version + run: | + if ! diff -q docs/ops.md /tmp/ops_check/ops.md; then + echo "Operations documentation (docs/ops.md) is not up to date with the backend CSV files." + echo "To fix: run ./scripts/create_ops_docs.py and commit the updated docs/ops.md along with your changes" + echo "Differences found:" + diff docs/ops.md /tmp/ops_check/ops.md || true + exit 1 + fi + echo "Operations documentation is up to date." diff --git a/.gitmodules b/.gitmodules index 23ce5ff059b1b..e69de29bb2d1d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "kompute"] - path = ggml/src/ggml-kompute/kompute - url = https://github.com/nomic-ai/kompute.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 50801cdc637bd..c79ccd09e097c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -95,7 +95,7 @@ endif() if (NOT DEFINED LLAMA_BUILD_COMMIT) set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) endif() -set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) +set(LLAMA_INSTALL_VERSION 0.0.${LLAMA_BUILD_NUMBER}) # override ggml options set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) @@ -120,7 +120,6 @@ endfunction() llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA) llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA) -llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE) llama_option_depr(WARNING LLAMA_METAL GGML_METAL) llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY) llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE) diff --git a/CMakePresets.json b/CMakePresets.json index e9844701304fc..b5afeb3c0f2f9 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -55,6 +55,17 @@ "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" } }, + { + "name": "x64-linux-gcc", "hidden": true, + "cacheVariables": { + "CMAKE_C_COMPILER": "gcc", + "CMAKE_CXX_COMPILER": "g++" + } + }, + { "name": "x64-linux-gcc-debug", "inherits": [ "base", "x64-linux-gcc", "debug" ] }, + { "name": "x64-linux-gcc-release", "inherits": [ "base", "x64-linux-gcc", "release" ] }, + { "name": "x64-linux-gcc-reldbg", "inherits": [ "base", "x64-linux-gcc", "reldbg" ] }, + { "name": "x64-linux-gcc+static-release", "inherits": [ "base", "x64-linux-gcc", "release", "static" ] }, { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, diff --git a/README.md b/README.md index 90c7364dfcba0..edde61238cb5f 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ [![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases) [![Server](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggml-org/llama.cpp/actions/workflows/server.yml) -[Roadmap](https://github.com/users/ggerganov/projects/7) / [Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml) +[Manifesto](https://github.com/ggml-org/llama.cpp/discussions/205) / [ggml](https://github.com/ggml-org/ggml) / [ops](https://github.com/ggml-org/llama.cpp/blob/master/docs/ops.md) -Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ +LLM inference in C/C++ ## Recent API changes @@ -17,10 +17,9 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) ## Hot topics -- 🔥 Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md) -- A new binary `llama-mtmd-cli` is introduced to replace `llava-cli`, `minicpmv-cli`, `gemma3-cli` ([#13012](https://github.com/ggml-org/llama.cpp/pull/13012)) and `qwen2vl-cli` ([#13141](https://github.com/ggml-org/llama.cpp/pull/13141)), `libllava` will be deprecated +- Hot PRs: [All](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+) | [Open](https://github.com/ggml-org/llama.cpp/pulls?q=is%3Apr+label%3Ahot+is%3Aopen) +- Multimodal support arrived in `llama-server`: [#12898](https://github.com/ggml-org/llama.cpp/pull/12898) | [documentation](./docs/multimodal.md) - VS Code extension for FIM completions: https://github.com/ggml-org/llama.vscode -- Universal [tool call support](./docs/function-calling.md) in `llama-server` https://github.com/ggml-org/llama.cpp/pull/9639 - Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim - Introducing GGUF-my-LoRA https://github.com/ggml-org/llama.cpp/discussions/10123 - Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggml-org/llama.cpp/discussions/9669 @@ -134,6 +133,7 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo - [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct) - [X] [Trillion-7B-preview](https://huggingface.co/trillionlabs/Trillion-7B-preview) - [x] [Ling models](https://huggingface.co/collections/inclusionAI/ling-67c51c85b34a7ea0aba94c32) +- [x] [LFM2 models](https://huggingface.co/collections/LiquidAI/lfm2-686d721927015b2ad73eaa38) #### Multimodal @@ -269,6 +269,8 @@ Instructions for adding support for new models: [HOWTO-add-model.md](docs/develo | [Vulkan](docs/build.md#vulkan) | GPU | | [CANN](docs/build.md#cann) | Ascend NPU | | [OpenCL](docs/backend/OPENCL.md) | Adreno GPU | +| [WebGPU [In Progress]](docs/build.md#webgpu) | All | + | [RPC](https://github.com/ggml-org/llama.cpp/tree/master/tools/rpc) | All | ## Obtaining and quantizing models diff --git a/build-xcframework.sh b/build-xcframework.sh index a08419a801b47..f813984db9dbd 100755 --- a/build-xcframework.sh +++ b/build-xcframework.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Options IOS_MIN_OS_VERSION=16.4 diff --git a/ci/run.sh b/ci/run.sh index 2968a7dd48d42..4d3abf9232212 100755 --- a/ci/run.sh +++ b/ci/run.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # sample usage: # @@ -16,6 +16,9 @@ # # with VULKAN support # GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # +# # with WebGPU support +# GG_BUILD_WEBGPU=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt +# # # with MUSA support # GG_BUILD_MUSA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt # @@ -39,7 +42,7 @@ sd=`dirname $0` cd $sd/../ SRC=`pwd` -CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=OFF" +CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON" if [ ! -z ${GG_BUILD_METAL} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON" @@ -81,6 +84,10 @@ if [ ! -z ${GG_BUILD_VULKAN} ]; then CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" fi +if [ ! -z ${GG_BUILD_WEBGPU} ]; then + CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_WEBGPU=1" +fi + if [ ! -z ${GG_BUILD_MUSA} ]; then # Use qy1 by default (MTT S80) MUSA_ARCH=${MUSA_ARCH:-21} @@ -779,7 +786,7 @@ function gg_run_rerank_tiny { model_f16="${path_models}/ggml-model-f16.gguf" # for this model, the SEP token is "" - (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log + (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?\thi\nwhat is panda?\tit's a bear\nwhat is panda?\tThe giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log # sample output # rerank score 0: 0.029 diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index f43a630c900ff..0ae4d698f080c 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -86,8 +86,7 @@ if (LLAMA_CURL) endif() target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) - find_library(CURL_LIBRARY curl REQUIRED) - set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) + set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARIES}) endif () if (LLAMA_LLGUIDANCE) @@ -112,13 +111,13 @@ if (LLAMA_LLGUIDANCE) ExternalProject_Add(llguidance_ext GIT_REPOSITORY https://github.com/guidance-ai/llguidance - # v0.7.20 (+ fix to build on GCC 15): - GIT_TAG b5b8b64dba11c4e4ee6b1d1450d3a3ae279891e8 + # v1.0.1: + GIT_TAG d795912fedc7d393de740177ea9ea761e7905774 PREFIX ${CMAKE_BINARY_DIR}/llguidance SOURCE_DIR ${LLGUIDANCE_SRC} BUILD_IN_SOURCE TRUE CONFIGURE_COMMAND "" - BUILD_COMMAND cargo build --release + BUILD_COMMAND cargo build --release --package llguidance INSTALL_COMMAND "" BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/${LLGUIDANCE_LIB_NAME} ${LLGUIDANCE_PATH}/llguidance.h UPDATE_COMMAND "" diff --git a/common/arg.cpp b/common/arg.cpp index 0d0daa3610105..c1151f51da17b 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -988,10 +988,6 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context params.tensor_buft_overrides.push_back({nullptr, nullptr}); } - if (params.reranking && params.embedding) { - throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both"); - } - if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) { throw std::runtime_error(string_format( "error: the supplied chat template is not supported: %s%s\n", @@ -1468,6 +1464,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.swa_full = true; } ).set_env("LLAMA_ARG_SWA_FULL")); + add_opt(common_arg( + {"--kv-unified", "-kvu"}, + string_format("use single unified KV buffer for the KV cache of all sequences (default: %s)\n" + "[(more info)](https://github.com/ggml-org/llama.cpp/pull/14363)", params.kv_unified ? "true" : "false"), + [](common_params & params) { + params.kv_unified = true; + } + ).set_env("LLAMA_ARG_KV_SPLIT")); add_opt(common_arg( {"--no-context-shift"}, string_format("disables context shift on infinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), @@ -2710,6 +2714,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.embd_sep = value; } ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); + add_opt(common_arg( + {"--cls-separator"}, "STRING", + "separator of classification sequences (default \\t) for example \"<#seq#>\"", + [](common_params & params, const std::string & value) { + params.cls_sep = value; + } + ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); add_opt(common_arg( {"--host"}, "HOST", string_format("ip address to listen, or bind to an UNIX socket if the address ends with .sock (default: %s)", params.hostname.c_str()), @@ -2731,6 +2742,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.public_path = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH")); + add_opt(common_arg( + {"--api-prefix"}, "PREFIX", + string_format("prefix path the server serves from, without the trailing slash (default: %s)", params.api_prefix.c_str()), + [](common_params & params, const std::string & value) { + params.api_prefix = value; + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX")); add_opt(common_arg( {"--no-webui"}, string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"), @@ -2747,9 +2765,10 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS")); add_opt(common_arg( {"--reranking", "--rerank"}, - string_format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"), + string_format("enable reranking endpoint on server (default: %s)", "disabled"), [](common_params & params) { - params.reranking = true; + params.embedding = true; + params.pooling_type = LLAMA_POOLING_TYPE_RANK; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING")); add_opt(common_arg( @@ -2790,6 +2809,16 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.ssl_file_cert = value; } ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE")); + add_opt(common_arg( + {"--chat-template-kwargs"}, "STRING", + string_format("sets additional params for the json template parser"), + [](common_params & params, const std::string & value) { + auto parsed = json::parse(value); + for (const auto & item : parsed.items()) { + params.default_template_kwargs[item.key()] = item.value().dump(); + } + } + ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_CHAT_TEMPLATE_KWARGS")); add_opt(common_arg( {"-to", "--timeout"}, "N", string_format("server read/write timeout in seconds (default: %d)", params.timeout_read), @@ -3213,6 +3242,32 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.speculative.model.path = value; } ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); + add_opt(common_arg( + {"-ctkd", "--cache-type-k-draft"}, "TYPE", + string_format( + "KV cache data type for K for the draft model\n" + "allowed values: %s\n" + "(default: %s)", + get_all_kv_cache_types().c_str(), + ggml_type_name(params.speculative.cache_type_k) + ), + [](common_params & params, const std::string & value) { + params.speculative.cache_type_k = kv_cache_type_from_str(value); + } + ).set_env("LLAMA_ARG_CACHE_TYPE_K_DRAFT")); + add_opt(common_arg( + {"-ctvd", "--cache-type-v-draft"}, "TYPE", + string_format( + "KV cache data type for V for the draft model\n" + "allowed values: %s\n" + "(default: %s)", + get_all_kv_cache_types().c_str(), + ggml_type_name(params.speculative.cache_type_v) + ), + [](common_params & params, const std::string & value) { + params.speculative.cache_type_v = kv_cache_type_from_str(value); + } + ).set_env("LLAMA_ARG_CACHE_TYPE_V_DRAFT")); add_opt(common_arg( {"-mv", "--model-vocoder"}, "FNAME", @@ -3376,5 +3431,34 @@ common_params_context common_params_parser_init(common_params & params, llama_ex } ).set_examples({LLAMA_EXAMPLE_SERVER})); + // diffusion parameters + add_opt(common_arg( + { "--diffusion-steps" }, "N", + string_format("number of diffusion steps (default: %d)", params.diffusion.steps), + [](common_params & params, int value) { params.diffusion.steps = value; } + ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); + add_opt(common_arg( + { "--diffusion-eps" }, "F", + string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps), + [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); } + ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); + add_opt(common_arg( + { "--diffusion-algorithm" }, "N", + string_format("diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY (default: %d)", + params.diffusion.algorithm), + [](common_params & params, int value) { params.diffusion.algorithm = value; } + ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); + add_opt(common_arg( + { "--diffusion-alg-temp" }, "F", + string_format("algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp), + [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); } + ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); + add_opt(common_arg( + { "--diffusion-visual" }, + string_format("enable visual diffusion mode (show progressive generation) (default: %s)", + params.diffusion.visual_mode ? "true" : "false"), + [](common_params & params) { params.diffusion.visual_mode = true; } + ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); + return ctx_arg; } diff --git a/common/chat.cpp b/common/chat.cpp index 0dad14fba9ba5..114dbfccdbfe7 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -17,6 +17,8 @@ #include #include +using json = nlohmann::ordered_json; + static std::string format_time(const std::chrono::system_clock::time_point & now, const std::string & format) { auto time = std::chrono::system_clock::to_time_t(now); auto local_time = *std::localtime(&time); @@ -140,6 +142,7 @@ struct templates_params { bool add_generation_prompt = true; bool enable_thinking = true; std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + json extra_context; }; common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) { @@ -720,16 +723,23 @@ static void foreach_function(const json & tools, const std::function & messages_override = std::nullopt, + const std::optional & tools_override = std::nullopt, + const std::optional & additional_context = std::nullopt) { minja::chat_template_inputs tmpl_inputs; - tmpl_inputs.messages = messages; - tmpl_inputs.tools = tools; - tmpl_inputs.add_generation_prompt = add_generation_prompt; - tmpl_inputs.extra_context = extra_context; + tmpl_inputs.messages = messages_override ? *messages_override : inputs.messages; + if (tools_override) { + tmpl_inputs.tools = *tools_override; + } else { + tmpl_inputs.tools = inputs.tools.empty() ? json() : inputs.tools; + } + tmpl_inputs.add_generation_prompt = inputs.add_generation_prompt; + tmpl_inputs.extra_context = inputs.extra_context; + if (additional_context) { + tmpl_inputs.extra_context.merge_patch(*additional_context); + } // TODO: add flag to control date/time, if only for testing purposes. // tmpl_inputs.now = std::chrono::system_clock::now(); @@ -828,7 +838,7 @@ static common_chat_params common_chat_params_init_generic(const common_chat_temp inputs.messages, "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); - data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = apply(tmpl, inputs, /* messages_override= */ tweaked_messages); data.format = COMMON_CHAT_FORMAT_GENERIC; return data; } @@ -904,7 +914,7 @@ static common_chat_params common_chat_params_init_mistral_nemo(const common_chat data.preserved_tokens = { "[TOOL_CALLS]", }; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = apply(tmpl, inputs); data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; return data; } @@ -934,7 +944,7 @@ static common_chat_params common_chat_params_init_command_r7b(const common_chat_ adjusted_messages.push_back(msg); } } - data.prompt = apply(tmpl, adjusted_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {}); + data.prompt = apply(tmpl, inputs, /* messages_override= */ adjusted_messages); data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; if (string_ends_with(data.prompt, "<|START_THINKING|>")) { if (!inputs.enable_thinking) { @@ -1122,7 +1132,7 @@ static common_chat_params common_chat_params_init_llama_3_x(const common_chat_te } else { data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; } - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, { + data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, json { {"date_string", format_time(inputs.now, "%d %b %Y")}, {"tools_in_user_message", false}, {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools}, @@ -1187,7 +1197,7 @@ static void common_chat_parse_llama_3_1(common_chat_msg_parser & builder, bool w static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; - auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + auto prompt = apply(tmpl, inputs); // Hacks to fix the official (broken) prompt. // It is advisable to use --chat-template-file models/templates/llama-cpp-deepseek-r1.jinja instead, @@ -1282,7 +1292,7 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) { static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { LOG_DBG("%s\n", __func__); common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, { + data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ json(), json { {"datetime", format_time(inputs.now, "%b %d %Y %H:%M:%S GMT")}, {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, }); @@ -1338,7 +1348,7 @@ static common_chat_params common_chat_params_init_functionary_v3_2(const common_ // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar // If the function is python, we also allow raw python code (if the line after `python\n` doesn't start w/ opening `{`), which the model seems to prefer for multiline code. common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = apply(tmpl, inputs); data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; if (inputs.tools.is_array() && !inputs.tools.empty()) { data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED; @@ -1465,7 +1475,7 @@ static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(con data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; } - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = apply(tmpl, inputs); // TODO: if (has_raw_python) return data; } @@ -1498,14 +1508,15 @@ static void common_chat_parse_functionary_v3_1_llama_3_1(common_chat_msg_parser static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; - json additional_context = { + json extra_context = json { {"enable_thinking", inputs.enable_thinking}, }; + extra_context.update(inputs.extra_context); - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, additional_context); + data.prompt = apply(tmpl, inputs, /* messages_override =*/ std::nullopt, /* tools_override= */ std::nullopt, extra_context); data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; if (string_ends_with(data.prompt, "\n")) { - if (!inputs.enable_thinking) { + if (!extra_context["enable_thinking"]) { data.prompt += ""; } else { data.thinking_forced_open = true; @@ -1691,7 +1702,7 @@ static void common_chat_parse_hermes_2_pro(common_chat_msg_parser & builder) { static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct templates_params & inputs) { common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); + data.prompt = apply(tmpl, inputs); data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; data.grammar_lazy = false; if (!inputs.json_schema.is_null()) { @@ -1722,6 +1733,12 @@ static common_chat_params common_chat_templates_apply_jinja( params.enable_thinking = inputs.enable_thinking; params.grammar = inputs.grammar; params.now = inputs.now; + + params.extra_context = json::object(); + for (auto el : inputs.chat_template_kwargs) { + params.extra_context[el.first] = json::parse(el.second); + } + if (!inputs.json_schema.empty()) { params.json_schema = json::parse(inputs.json_schema); } @@ -1838,7 +1855,7 @@ static common_chat_params common_chat_templates_apply_legacy( if (res < 0) { // if the custom "tmpl" is not supported, we throw an error // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() - throw std::runtime_error("this custom template is not supported"); + throw std::runtime_error("this custom template is not supported, try using --jinja"); } // if it turns out that our buffer is too small, we resize it diff --git a/common/chat.h b/common/chat.h index 9f59e6b08738d..ca807c145ee82 100644 --- a/common/chat.h +++ b/common/chat.h @@ -7,6 +7,7 @@ #include #include #include +#include struct common_chat_templates; @@ -125,6 +126,7 @@ struct common_chat_templates_inputs { common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE; bool enable_thinking = true; std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); + std::map chat_template_kwargs; }; struct common_chat_params { diff --git a/common/common.cpp b/common/common.cpp index e23887c70770c..466271be61c63 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -706,11 +706,17 @@ bool fs_validate_filename(const std::string & filename) { // disable C++17 deprecation warning for std::codecvt_utf8 # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wdeprecated-declarations" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif + std::wstring_convert, char32_t> converter; #if defined(__clang__) # pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop #endif filename_utf32 = converter.from_bytes(filename); @@ -767,6 +773,9 @@ bool fs_validate_filename(const std::string & filename) { return true; } +#include + + // returns true if successful, false otherwise bool fs_create_directory_with_parents(const std::string & path) { #ifdef _WIN32 @@ -784,9 +793,16 @@ bool fs_create_directory_with_parents(const std::string & path) { // process path from front to back, procedurally creating directories while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { const std::wstring subpath = wpath.substr(0, pos_slash); - const wchar_t * test = subpath.c_str(); - const bool success = CreateDirectoryW(test, NULL); + pos_slash += 1; + + // skip the drive letter, in some systems it can return an access denied error + if (subpath.length() == 2 && subpath[1] == ':') { + continue; + } + + const bool success = CreateDirectoryW(subpath.c_str(), NULL); + if (!success) { const DWORD error = GetLastError(); @@ -800,8 +816,6 @@ bool fs_create_directory_with_parents(const std::string & path) { return false; } } - - pos_slash += 1; } return true; @@ -897,34 +911,6 @@ struct common_init_result common_init_from_params(common_params & params) { const llama_vocab * vocab = llama_model_get_vocab(model); - if (params.reranking) { - bool ok = true; - - if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__); - ok = false; - } - - bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; - bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL; - - if (!has_eos && !has_sep) { - LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__); - ok = false; - } else if (!has_eos) { - LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__); - } else if (!has_sep) { - LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__); - ok = false; - } - - if (!ok) { - llama_model_free(model); - - return iparams; - } - } - auto cparams = common_context_params_to_llama(params); llama_context * lctx = llama_init_from_model(model, cparams); @@ -966,6 +952,35 @@ struct common_init_result common_init_from_params(common_params & params) { } } + if (llama_pooling_type(lctx) == LLAMA_POOLING_TYPE_RANK) { + bool ok = true; + + if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { + LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__); + ok = false; + } + + bool has_eos = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; + bool has_sep = llama_vocab_sep(vocab) != LLAMA_TOKEN_NULL; + + if (!has_eos && !has_sep) { + LOG_WRN("%s: warning: vocab does not have an EOS token or SEP token, reranking will not work\n", __func__); + ok = false; + } else if (!has_eos) { + LOG_WRN("%s: warning: vocab does not have an EOS token, using SEP token as fallback\n", __func__); + } else if (!has_sep) { + LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__); + ok = false; + } + + if (!ok) { + llama_free(lctx); + llama_model_free(model); + + return iparams; + } + } + // load and optionally apply lora adapters for (auto & la : params.lora_adapters) { llama_adapter_lora_ptr lora; @@ -990,15 +1005,21 @@ struct common_init_result common_init_from_params(common_params & params) { params.sampling.ignore_eos = false; } - if (params.sampling.ignore_eos) { - for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { - if (llama_vocab_is_eog(vocab, i)) { - LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY); - params.sampling.logit_bias.push_back({i, -INFINITY}); - } + // initialize once + for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { + if (llama_vocab_is_eog(vocab, i)) { + LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY); + params.sampling.logit_bias_eog.push_back({i, -INFINITY}); } } + if (params.sampling.ignore_eos) { + // add EOG biases to the active set of logit biases + params.sampling.logit_bias.insert( + params.sampling.logit_bias.end(), + params.sampling.logit_bias_eog.begin(), params.sampling.logit_bias_eog.end()); + } + if (params.sampling.penalty_last_n == -1) { LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); params.sampling.penalty_last_n = llama_n_ctx(lctx); @@ -1142,11 +1163,7 @@ struct llama_context_params common_context_params_to_llama(const common_params & cparams.no_perf = params.no_perf; cparams.op_offload = !params.no_op_offload; cparams.swa_full = params.swa_full; - - if (params.reranking) { - cparams.embeddings = true; - cparams.pooling_type = LLAMA_POOLING_TYPE_RANK; - } + cparams.kv_unified = params.kv_unified; cparams.type_k = params.cache_type_k; cparams.type_v = params.cache_type_v; @@ -1280,6 +1297,9 @@ std::vector common_tokenize( int n_tokens = text.length() + 2 * add_special; std::vector result(n_tokens); n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); + if (n_tokens == std::numeric_limits::min()) { + throw std::runtime_error("Tokenization failed: input text too large, tokenization result exceeds int32_t limit"); + } if (n_tokens < 0) { result.resize(-n_tokens); int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); diff --git a/common/common.h b/common/common.h index f26724b6e1495..27adf552465e7 100644 --- a/common/common.h +++ b/common/common.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #ifdef _WIN32 @@ -80,6 +81,7 @@ enum llama_example { LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_PARALLEL, LLAMA_EXAMPLE_TTS, + LLAMA_EXAMPLE_DIFFUSION, LLAMA_EXAMPLE_COUNT, }; @@ -176,7 +178,8 @@ struct common_params_sampling { std::vector grammar_triggers; // optional triggers (for lazy grammars) std::set preserved_tokens; - std::vector logit_bias; // logit biases to apply + std::vector logit_bias; // logit biases to apply + std::vector logit_bias_eog; // pre-calculated logit biases for EOG tokens // print the parameters into a string std::string print() const; @@ -199,6 +202,9 @@ struct common_params_speculative { float p_split = 0.1f; // speculative decoding split probability float p_min = 0.75f; // minimum speculative decoding probability (greedy) + ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K + ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V + struct cpu_params cpuparams; struct cpu_params cpuparams_batch; @@ -213,6 +219,14 @@ struct common_params_vocoder { bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT }; +struct common_params_diffusion { + int32_t steps = 64; // number of diffusion steps + float eps = 1e-3f; // epsilon for timesteps + int32_t algorithm = 0; // diffusion algorithm (0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY) + float alg_temp = 0.0f; // algorithm temperature + bool visual_mode = false; // show progressive diffusion on screen +}; + enum common_reasoning_format { COMMON_REASONING_FORMAT_NONE, COMMON_REASONING_FORMAT_DEEPSEEK_LEGACY, // Extract thinking tag contents and return as `message.reasoning_content`, or leave inline in tags in stream mode @@ -264,6 +278,7 @@ struct common_params { struct common_params_sampling sampling; struct common_params_speculative speculative; struct common_params_vocoder vocoder; + struct common_params_diffusion diffusion; struct common_params_model model; @@ -326,6 +341,7 @@ struct common_params { bool no_perf = false; // disable performance metrics bool ctx_shift = true; // context shift on inifinite text generation bool swa_full = false; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055) + bool kv_unified = false; // enable unified KV cache bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix bool use_mmap = true; // use mmap for faster loads @@ -355,7 +371,7 @@ struct common_params { int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix std::string embd_sep = "\n"; // separator of embeddings - bool reranking = false; // enable reranking support on server + std::string cls_sep = "\t"; // separator of classification sequences // server params int32_t port = 8080; // server listens on this network port @@ -366,6 +382,7 @@ struct common_params { std::string hostname = "127.0.0.1"; std::string public_path = ""; // NOLINT + std::string api_prefix = ""; // NOLINT std::string chat_template = ""; // NOLINT bool use_jinja = false; // NOLINT bool enable_chat_template = true; @@ -378,6 +395,8 @@ struct common_params { std::string ssl_file_key = ""; // NOLINT std::string ssl_file_cert = ""; // NOLINT + std::map default_template_kwargs; + // "advanced" endpoints are disabled by default for better security bool webui = true; bool endpoint_slots = false; diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index d38a74f95c213..637891f50699c 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -41,49 +41,6 @@ static std::string build_repetition(const std::string & item_rule, int min_items return result; } -/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */ -class string_view { - const std::string & _str; - const size_t _start; - const size_t _end; -public: - string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {} - - size_t size() const { - return _end - _start; - } - - size_t length() const { - return size(); - } - - operator std::string() const { - return str(); - } - - std::string str() const { - return _str.substr(_start, _end - _start); - } - - string_view substr(size_t pos, size_t len = std::string::npos) const { - return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len); - } - - char operator[](size_t pos) const { - auto index = _start + pos; - if (index >= _end) { - throw std::out_of_range("string_view index out of range"); - } - return _str[_start + pos]; - } - - bool operator==(const string_view & other) const { - std::string this_str = *this; - std::string other_str = other; - return this_str == other_str; - } -}; - static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { auto has_min = min_value != std::numeric_limits::min(); auto has_max = max_value != std::numeric_limits::max(); @@ -112,14 +69,14 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream & } out << "}"; }; - std::function uniform_range = - [&](const string_view & from, const string_view & to) { + std::function uniform_range = + [&](const std::string_view & from, const std::string_view & to) { size_t i = 0; while (i < from.length() && i < to.length() && from[i] == to[i]) { i++; } if (i > 0) { - out << "\"" << from.substr(0, i).str() << "\""; + out << "\"" << from.substr(0, i) << "\""; } if (i < from.length() && i < to.length()) { if (i > 0) { diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 173a103badc60..d9185c8060028 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -300,6 +300,7 @@ def prepare_tensors(self): gguf.MODEL_TENSOR.POS_EMBD, gguf.MODEL_TENSOR.TOKEN_TYPES, gguf.MODEL_TENSOR.SSM_CONV1D, + gguf.MODEL_TENSOR.SHORTCONV_CONV, gguf.MODEL_TENSOR.TIME_MIX_FIRST, gguf.MODEL_TENSOR.TIME_MIX_W1, gguf.MODEL_TENSOR.TIME_MIX_W2, @@ -310,6 +311,8 @@ def prepare_tensors(self): gguf.MODEL_TENSOR.POSNET_NORM2, gguf.MODEL_TENSOR.V_ENC_EMBD_POS, gguf.MODEL_TENSOR.A_ENC_EMBD_POS, + gguf.MODEL_TENSOR.ALTUP_CORRECT_COEF, + gguf.MODEL_TENSOR.ALTUP_PREDICT_COEF, ) ) or not new_name.endswith(".weight") @@ -320,7 +323,11 @@ def prepare_tensors(self): self.match_model_tensor_name(new_name, key, bid) for key in ( gguf.MODEL_TENSOR.TOKEN_EMBD, + gguf.MODEL_TENSOR.PER_LAYER_TOKEN_EMBD, gguf.MODEL_TENSOR.OUTPUT, + gguf.MODEL_TENSOR.ALTUP_ROUTER, + gguf.MODEL_TENSOR.LAUREL_L, + gguf.MODEL_TENSOR.LAUREL_R, ) ): if self.ftype in ( @@ -519,7 +526,7 @@ def prepare_metadata(self, vocab_only: bool): def set_gguf_parameters(self): self.gguf_writer.add_block_count(self.block_count) - if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx", "n_positions"], optional=True)) is not None: + if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx", "n_positions", "max_length"], optional=True)) is not None: self.gguf_writer.add_context_length(n_ctx) logger.info(f"gguf: context length = {n_ctx}") @@ -556,11 +563,8 @@ def set_gguf_parameters(self): logger.info(f"gguf: experts used count = {n_experts_used}") if (head_dim := self.hparams.get("head_dim")) is not None: - # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class) - # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210 - if self.hparams.get("model_type") != "deepseek_v3": - self.gguf_writer.add_key_length(head_dim) - self.gguf_writer.add_value_length(head_dim) + self.gguf_writer.add_key_length(head_dim) + self.gguf_writer.add_value_length(head_dim) self.gguf_writer.add_file_type(self.ftype) logger.info(f"gguf: file type = {self.ftype}") @@ -665,6 +669,36 @@ def get_vocab_base_pre(self, tokenizer) -> str: # NOTE: if you get an error here, you need to update the convert_hf_to_gguf_update.py script # or pull the latest version of the model from Huggingface # don't edit the hashes manually! + if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b": + # ref: https://huggingface.co/THUDM/glm-4-9b-chat + res = "chatglm-bpe" + if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": + # ref: https://huggingface.co/THUDM/glm-4-9b-chat + res = "chatglm-bpe" + if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": + # ref: https://huggingface.co/THUDM/glm-4-9b-hf + res = "glm4" + if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35": + # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0 + res = "minerva-7b" + if chkhsh == "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664": + # ref: https://huggingface.co/tencent/Hunyuan-A13B-Instruct + res = "hunyuan" + if chkhsh == "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6": + # ref: https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base + res = "falcon-h1" + if chkhsh == "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86": + # ref: https://huggingface.co/tiiuae/Falcon-H1-1B-Base + res = "falcon-h1" + if chkhsh == "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896": + # ref: https://huggingface.co/tiiuae/Falcon-H1-7B-Base + res = "falcon-h1" + if chkhsh == "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b": + # ref: https://huggingface.co/tiiuae/Falcon-H1-34B-Base + res = "falcon-h1" + if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890": + # ref: https://huggingface.co/moonshotai/Kimi-K2-Base + res = "kimi-k2" if chkhsh == "0ef9807a4087ebef797fc749390439009c3b9eda9ad1a097abbe738f486c01e5": # ref: https://huggingface.co/meta-llama/Meta-Llama-3-8B res = "llama-bpe" @@ -800,18 +834,15 @@ def get_vocab_base_pre(self, tokenizer) -> str: if chkhsh == "d5f1dd6f980fec569fb218a81a7658ac45fc56b38c5a0adeb1c232fbe04ef5ec": # ref: https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base res = "seed-coder" - if chkhsh == "b6e8e1518dc4305be2fe39c313ed643381c4da5db34a98f6a04c093f8afbe99b": - # ref: https://huggingface.co/THUDM/glm-4-9b-chat - res = "chatglm-bpe" - if chkhsh == "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516": - # ref: https://huggingface.co/THUDM/glm-4-9b-chat - res = "chatglm-bpe" - if chkhsh == "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2": - # ref: https://huggingface.co/THUDM/glm-4-9b-hf - res = "glm4" - if chkhsh == "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35": - # ref: https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0 - res = "minerva-7b" + if chkhsh == "b0a6b1c0bd5998ebd9df08611efde34a4ff03faed45ae09c43e6b31ebd4b94cf": + # ref: https://huggingface.co/skt/A.X-4.0 + res = "a.x-4.0" + if chkhsh == "f6791d196f87ce6b56a7d234be618e0d58f8cda3549416635b2bebcd22cd95c4": + # ref: https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct + res = "midm-2.0" + if chkhsh == "169bf0296a13c4d9b7672313f749eb36501d931022de052aad6e36f2bf34dd51": + # ref: https://huggingface.co/LiquidAI/LFM2-Tokenizer + res = "lfm2" if res is None: logger.warning("\n") @@ -924,13 +955,20 @@ def _create_vocab_sentencepiece(self): tokenizer = SentencePieceProcessor() tokenizer.LoadFromFile(str(tokenizer_path)) - vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size()) + vocab_size = self.find_hparam([ + "vocab_size_per_layer_input", # gemma3n + "vocab_size", + ], optional=True) or tokenizer.vocab_size() tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)] scores: list[float] = [-10000.0] * vocab_size toktypes: list[int] = [SentencePieceTokenTypes.UNUSED] * vocab_size for token_id in range(tokenizer.vocab_size()): + if token_id >= vocab_size: + logger.warning(f'ignore tokens from {token_id}: id is out of range, max={vocab_size - 1}') + break + piece = tokenizer.IdToPiece(token_id) text = piece.encode("utf-8") score = tokenizer.GetScore(token_id) @@ -1047,7 +1085,14 @@ def _set_vocab_rwkv_world(self): self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_types(toktypes) special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) - special_vocab.chat_template = "rwkv-world" + if special_vocab.chat_template is None: + template_path = Path(__file__).parent / "models" / "templates" / "llama-cpp-rwkv-world.jinja" + if template_path.is_file(): + with open(template_path, "r", encoding="utf-8") as f: + template = f.read() + else: + template = "rwkv-world" + special_vocab.chat_template = template # hack: Add '\n\n' as the EOT token to make it chat normally special_vocab._set_special_token("eot", 261) # hack: Override these as they have already been set (incorrectly) @@ -1901,9 +1946,7 @@ def set_gguf_parameters(self): hparams = self.hparams self.gguf_writer.add_vocab_size(hparams["vocab_size"]) - if "head_dim" in hparams: - rope_dim = hparams["head_dim"] - else: + if (rope_dim := hparams.get("head_dim")) is None: rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] self.gguf_writer.add_rope_dimension_count(rope_dim) @@ -1985,7 +2028,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: if rope_scaling := self.find_hparam(["rope_scaling"], optional=True): if rope_scaling.get("rope_type", '').lower() == "llama3": base = self.hparams.get("rope_theta", 10000.0) - dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) + if (dim := self.hparams.get("head_dim")) is None: + dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)) factor = rope_scaling.get("factor", 8.0) @@ -2020,6 +2064,20 @@ def prepare_tensors(self): raise ValueError(f"Unprocessed experts: {experts}") +@ModelBase.register("ArceeForCausalLM") +class ArceeModel(LlamaModel): + model_arch = gguf.MODEL_ARCH.ARCEE + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self._try_set_pooling_type() + rope_scaling = self.hparams.get("rope_scaling") or {} + if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling: + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"]) + + @ModelBase.register( "LlavaForConditionalGeneration", # pixtral "Mistral3ForConditionalGeneration", # mistral small 3.1 @@ -2135,7 +2193,6 @@ def __init__(self, *args, **kwargs): def set_vocab(self): self._set_vocab_gpt2() - self.gguf_writer.add_add_bos_token(True) def set_gguf_parameters(self): super().set_gguf_parameters() @@ -2184,7 +2241,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter name += ".weight" if "multi_modal_projector.linear_1" in name: # despite the name with number postfix, this is a single fully connected layer - return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)] + return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC] + '.weight', data_torch)] return [(self.map_tensor_name(name), data_torch)] return [] @@ -2307,9 +2364,7 @@ def set_gguf_parameters(self): hparams = self.hparams self.gguf_writer.add_vocab_size(hparams["vocab_size"]) - if "head_dim" in hparams: - rope_dim = hparams["head_dim"] - else: + if (rope_dim := hparams.get("head_dim")) is None: rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] self.gguf_writer.add_rope_dimension_count(rope_dim) @@ -2349,7 +2404,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: if rope_scaling := self.find_hparam(["rope_scaling"], optional=True): if rope_scaling.get("rope_type", '').lower() == "llama3": base = self.hparams.get("rope_theta", 10000.0) - dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) + if (dim := self.hparams.get("head_dim")) is None: + dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)) factor = rope_scaling.get("factor", 8.0) @@ -2722,6 +2778,210 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter yield from super().modify_tensors(data_torch, name, bid) +@ModelBase.register("DreamModel") +class DreamModel(TextModel): + model_arch = gguf.MODEL_ARCH.DREAM + + def get_vocab_base(self) -> tuple[list[str], list[int], str]: + tokens: list[str] = [] + toktypes: list[int] = [] + + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) + + vocab_dict = tokenizer.get_vocab() + vocab_size = self.hparams.get("vocab_size", len(vocab_dict)) + assert max(vocab_dict.values()) < vocab_size + + tokpre = self.get_vocab_base_pre(tokenizer) + + reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in vocab_dict.items()} + added_vocab = tokenizer.get_added_vocab() + + for i in range(vocab_size): + if i not in reverse_vocab: + tokens.append(f"[PAD{i}]") + toktypes.append(gguf.TokenType.UNUSED) + elif reverse_vocab[i] in added_vocab: + tokens.append(reverse_vocab[i]) + # Check if it's a special token - treat special tokens as CONTROL tokens + if hasattr(tokenizer, 'added_tokens_decoder') and i in tokenizer.added_tokens_decoder: + if tokenizer.added_tokens_decoder[i].special: + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.USER_DEFINED) + else: + # Fallback: treat all added vocab as control tokens for special tokens like <|im_start|> + toktypes.append(gguf.TokenType.CONTROL) + else: + tokens.append(reverse_vocab[i]) + toktypes.append(gguf.TokenType.NORMAL) + + return tokens, toktypes, tokpre + + def set_vocab(self): + try: + self._set_vocab_sentencepiece() + except FileNotFoundError: + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self._try_set_pooling_type() + + # Dream models use non-causal attention for diffusion + self.gguf_writer.add_causal_attention(False) + # Handle RoPE scaling similar to Qwen2 + rope_scaling = self.hparams.get("rope_scaling") or {} + if rope_scaling.get("rope_type", rope_scaling.get("type")) == "yarn" and "factor" in rope_scaling: + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) + self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"]) + self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"]) + + # Add Dream-specific parameters + mask_token_id = self.hparams.get("mask_token_id") + if mask_token_id is not None: + self.gguf_writer.add_mask_token_id(mask_token_id) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + # Dream model tensors should be mapped directly since it's the base model + yield from super().modify_tensors(data_torch, name, bid) + + +@ModelBase.register("Ernie4_5_ForCausalLM") +class Ernie4_5Model(TextModel): + model_arch = gguf.MODEL_ARCH.ERNIE4_5 + + def set_vocab(self): + self._set_vocab_sentencepiece() + + def set_gguf_parameters(self): + super().set_gguf_parameters() + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + num_heads = self.hparams["num_attention_heads"] + num_kv_heads = self.hparams["num_key_value_heads"] + if (head_dim := self.hparams.get("head_dim")) is None: + head_dim = self.hparams["hidden_size"] // num_heads + + if "ernie." in name: + name = name.replace("ernie.", "model.") + # split the qkv weights + # qkv_proj shape: [(num_heads + 2 * num_kv_heads) * head_dim, hidden_size] + if "qkv_proj" in name: + name_q = name.replace("qkv_proj.weight", "q_proj.weight") + name_k = name.replace("qkv_proj.weight", "k_proj.weight") + name_v = name.replace("qkv_proj.weight", "v_proj.weight") + total_q_dim = num_heads * head_dim + total_k_dim = num_kv_heads * head_dim + total_v_dim = num_kv_heads * head_dim + q_proj_weight, k_proj_weight, v_proj_weight = data_torch.split([total_q_dim, total_k_dim, total_v_dim], dim=0) + return [ + (self.map_tensor_name(name_q), q_proj_weight), + (self.map_tensor_name(name_k), k_proj_weight), + (self.map_tensor_name(name_v), v_proj_weight) + ] + # split the up_gate_proj into gate and up + # up_gate_proj shape: [2 * intermediate_size, hidden_size] + if "up_gate_proj" in name: + name_up = name.replace("up_gate_proj.weight", "up_proj.weight") + name_gate = name.replace("up_gate_proj.weight", "gate_proj.weight") + dim_half = data_torch.shape[0] // 2 + gate_proj_weight, up_proj_weight = data_torch.split(dim_half, dim=0) + return [ + (self.map_tensor_name(name_gate), gate_proj_weight), + (self.map_tensor_name(name_up), up_proj_weight) + ] + return [(self.map_tensor_name(name), data_torch)] + + +@ModelBase.register("Ernie4_5_MoeForCausalLM") +class Ernie4_5MoeModel(Ernie4_5Model): + model_arch = gguf.MODEL_ARCH.ERNIE4_5_MOE + _experts: list[dict[str, Tensor]] | None = None + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._experts = [{} for _ in range(self.block_count)] + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_expert_count(self.hparams["moe_num_experts"]) + self.gguf_writer.add_expert_used_count(self.hparams["moe_k"]) + self.gguf_writer.add_interleave_moe_layer_step(self.hparams["moe_layer_interval"]) + self.gguf_writer.add_leading_dense_block_count(self.hparams["moe_layer_start_index"]) + if (moe_intermediate_size := self.hparams.get("moe_intermediate_size")) is not None: + self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size) + if (shared_expert_count := self.hparams.get('moe_num_shared_experts')) is not None: + self.gguf_writer.add_expert_shared_count(shared_expert_count) + if shared_expert_count > 0 and (shared_expert_intermediate_size := self.hparams.get('intermediate_size')) is not None and (num_key_value_heads := self.hparams.get('num_key_value_heads')) is not None: + self.gguf_writer.add_expert_shared_feed_forward_length(shared_expert_intermediate_size // num_key_value_heads) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + # Modify correction bias name as in DeepseekV2 + if name.endswith("e_score_correction_bias"): + name = name.replace("e_score_correction_bias", "e_score_correction.bias") + + # skip Multi-Token Prediction (MTP) layers (again, same as DeepseekV2) + match = re.match(r"model.mtp_block.(\d+)", name) + if match: + return [] + + # skip all other MTP tensors for now + match = re.match(r"model.mtp_emb_norm.(\d+)", name) + if match: + return [] + + match = re.match(r"model.mtp_hidden_norm.(\d+)", name) + if match: + return [] + + match = re.match(r"model.mtp_linear_proj.(\d+)", name) + if match: + return [] + + # process the experts separately + if name.find("mlp.experts") != -1: + n_experts = self.hparams["moe_num_experts"] + assert bid is not None + + if self._experts is None: + self._experts = [{} for _ in range(self.block_count)] + + self._experts[bid][name] = data_torch + + if len(self._experts[bid]) >= n_experts * 3: + tensors: list[tuple[str, Tensor]] = [] + + # merge the experts into a single 3d tensor + for w_name in ["gate_proj", "up_proj", "down_proj"]: + datas: list[Tensor] = [] + + for xid in range(n_experts): + ename_to_retrieve = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" + datas.append(self._experts[bid][ename_to_retrieve]) + del self._experts[bid][ename_to_retrieve] + + data_torch = torch.stack(datas, dim=0) + merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" + new_name = self.map_tensor_name(merged_name) + tensors.append((new_name, data_torch)) + + return tensors + else: + return [] + return [(self.map_tensor_name(name), data_torch)] + + def prepare_tensors(self): + super().prepare_tensors() + + if self._experts is not None: + # flatten `list[dict[str, Tensor]]` into `list[str]` + experts = [k for d in self._experts for k in d.keys()] + if len(experts) > 0: + raise ValueError(f"Unprocessed experts: {experts}") + + @ModelBase.register( "Qwen2VLModel", "Qwen2VLForConditionalGeneration", @@ -3409,6 +3669,175 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] +@ModelBase.register("Plamo2ForCausalLM", "PLaMo2ForCausalLM") +class Plamo2Model(TextModel): + model_arch = gguf.MODEL_ARCH.PLAMO2 + + def set_vocab(self): + # PLaMo 2 uses a custom tokenizer with a .jsonl file + # We need to handle this specially + tokenizer_jsonl_path = self.dir_model / "tokenizer.jsonl" + tokenizer_config_path = self.dir_model / "tokenizer_config.json" + + if not tokenizer_jsonl_path.is_file(): + raise FileNotFoundError(f"PLaMo 2 tokenizer file not found: {tokenizer_jsonl_path}") + + # Load tokenizer config + with open(tokenizer_config_path, 'r', encoding='utf-8') as f: + tokenizer_config = json.load(f) + + # Load tokens from JSONL file (actually a list format) + tokens = [] + scores = [] + toktypes = [] + + with open(tokenizer_jsonl_path, 'r', encoding='utf-8') as f: + for line_num, line in enumerate(f): + if line.strip(): + token_data = json.loads(line) + # Format: [token, score, type, ?, ?, ?, ?] + token = token_data[0].encode("utf-8") + score = float(token_data[1]) + token_type_str = token_data[2] if len(token_data) > 2 else "NORMAL" + + tokens.append(token) + scores.append(score) + + # Map token type strings to GGUF token types + if token_type_str == "UNKNOWN": + toktypes.append(gguf.TokenType.UNKNOWN) + elif token_type_str == "CONTROL": + toktypes.append(gguf.TokenType.CONTROL) + elif token_type_str == "BYTE": + toktypes.append(gguf.TokenType.BYTE) + else: + # Check for PLaMo-2 special tokens + token_str = token_data[0] + if token_str.startswith("<|plamo:") and token_str.endswith("|>"): + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.NORMAL) + + vocab_size = self.hparams["vocab_size"] + if vocab_size > len(tokens): + pad_count = vocab_size - len(tokens) + logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]") + for i in range(1, pad_count + 1): + tokens.append(bytes(f"[PAD{i}]", encoding="utf-8")) + scores.append(-1000.0) + toktypes.append(gguf.TokenType.UNUSED) + + # Use "plamo2" tokenizer type for PLaMo-2's custom Aho-Corasick tokenizer + self.gguf_writer.add_tokenizer_model("plamo2") + self.gguf_writer.add_tokenizer_pre("default") + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_scores(scores) + self.gguf_writer.add_token_types(toktypes) + + # Add special tokens from config + if "bos_token" in tokenizer_config and tokenizer_config["bos_token"] is not None: + token_id = tokens.index(tokenizer_config["bos_token"].encode("utf-8")) + self.gguf_writer.add_bos_token_id(token_id) + if "eos_token" in tokenizer_config and tokenizer_config["eos_token"] is not None: + token_id = tokens.index(tokenizer_config["eos_token"].encode("utf-8")) + self.gguf_writer.add_eos_token_id(token_id) + if "pad_token" in tokenizer_config and tokenizer_config["pad_token"] is not None: + token_id = tokens.index(tokenizer_config["pad_token"].encode("utf-8")) + self.gguf_writer.add_pad_token_id(token_id) + if "sep_token" in tokenizer_config and tokenizer_config["sep_token"] is not None: + token_id = tokens.index(tokenizer_config["sep_token"].encode("utf-8")) + self.gguf_writer.add_sep_token_id(token_id) + if "unk_token" in tokenizer_config and tokenizer_config["unk_token"] is not None: + token_id = tokens.index(tokenizer_config["unk_token"].encode("utf-8")) + self.gguf_writer.add_unk_token_id(token_id) + + # Add <|plamo:op|> as EOT to ensure appropriate end of generation + self.gguf_writer.add_eot_token_id(4) + + self.gguf_writer.add_add_space_prefix(False) + + def set_gguf_parameters(self): + hparams = self.hparams + block_count = hparams["num_hidden_layers"] + self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) + + # Which layers are Mamba layers + # PLaMo 2 uses mamba_step to indicate the pattern (e.g., 2 means every other layer) + # This logic matches modeling_plamo.py's is_mamba function + mamba_step = hparams.get("mamba_step", 2) + mamba_enabled = hparams.get("mamba_enabled", True) + mamba_layers = [] + + if mamba_enabled: + for i in range(block_count): + if block_count <= (mamba_step // 2): + # use attention in last layer + is_mamba = (i != block_count - 1) + else: + is_mamba = (i % mamba_step) != (mamba_step // 2) + if is_mamba: + mamba_layers.append(0) + else: + mamba_layers.append(hparams.get("num_key_value_heads", 4)) + + if mamba_layers: + self.gguf_writer.add_head_count_kv(mamba_layers) + + self.gguf_writer.add_context_length(hparams.get("max_position_embeddings", 2048)) + self.gguf_writer.add_embedding_length(hparams.get("hidden_size", 4096)) + self.gguf_writer.add_block_count(block_count) + self.gguf_writer.add_head_count(hparams.get("num_attention_heads", 32)) + self.gguf_writer.add_layer_norm_rms_eps(hparams.get("rms_norm_eps", 1e-06)) + self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1000000.0)) + + # Mamba parameters + self.gguf_writer.add_ssm_state_size(hparams.get("mamba_d_state", 64)) + self.gguf_writer.add_ssm_conv_kernel(hparams.get("mamba_d_conv", 4)) + self.gguf_writer.add_ssm_time_step_rank(hparams.get("mamba_num_heads", 64)) + intermediate_size = hparams.get("mamba_num_heads", 64) * hparams.get("hidden_size_per_head", 128) + self.gguf_writer.add_ssm_inner_size(intermediate_size) + self.gguf_writer.add_ssm_group_count(0) + + # MLP feed forward parameters (for attention layers) + self.gguf_writer.add_feed_forward_length(hparams.get("intermediate_size", 16384)) + self.gguf_writer.add_file_type(self.ftype) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + del bid # unused + + if name.endswith(".A_log"): + data_torch = -torch.exp(data_torch) + elif name.endswith(".dt_bias"): + name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias" + elif name.endswith(".dt_norm_weight"): + name = name.rpartition(".dt_norm_weight")[0] + ".dt_norm.weight" + elif name.endswith(".B_norm_weight"): + name = name.rpartition(".B_norm_weight")[0] + ".B_norm.weight" + elif name.endswith(".C_norm_weight"): + name = name.rpartition(".C_norm_weight")[0] + ".C_norm.weight" + elif name.endswith(".k_weight"): + name = name.rpartition(".k_weight")[0] + ".k.weight" + elif name.endswith(".q_weight"): + name = name.rpartition(".q_weight")[0] + ".q.weight" + elif name.endswith(".conv1d.weight"): + data_torch = torch.squeeze(data_torch) # remove (, 1, ) + assert data_torch.ndim == 2 + elif name.endswith(".pre_mixer_norm.weight"): + data_torch += 1.0 + elif name.endswith(".post_mixer_norm.weight"): + data_torch += 1.0 / 5 + elif name.endswith(".pre_mlp_norm.weight"): + data_torch += 1.0 + elif name.endswith(".post_mlp_norm.weight"): + data_torch += 1.0 / (5**1.5) + elif name.endswith(".norm.weight"): + data_torch += 1.0 + + new_name = self.map_tensor_name(name) + + return [(new_name, data_torch)] + + @ModelBase.register("CodeShellForCausalLM") class CodeShellModel(TextModel): model_arch = gguf.MODEL_ARCH.CODESHELL @@ -3667,9 +4096,7 @@ def set_gguf_parameters(self): hparams = self.hparams self.gguf_writer.add_vocab_size(hparams["vocab_size"]) - if "head_dim" in hparams: - rope_dim = hparams["head_dim"] - else: + if (rope_dim := hparams.get("head_dim")) is None: rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] self.gguf_writer.add_rope_dimension_count(rope_dim) @@ -3911,9 +4338,6 @@ def _xlmroberta_set_vocab(self) -> None: special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) - self.gguf_writer.add_add_bos_token(True) - self.gguf_writer.add_add_eos_token(True) - @ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification") class DistilBertModel(BertModel): @@ -3955,8 +4379,6 @@ def set_vocab(self): bpe_tok_path = self.dir_model / "tokenizer.json" if bpe_tok_path.exists(): self._set_vocab_gpt2() - self.gguf_writer.add_add_bos_token(True) - self.gguf_writer.add_add_eos_token(True) # we need this to validate the size of the token_type embeddings # though currently we are passing all zeros to the token_type embeddings @@ -4062,6 +4484,34 @@ def _is_tokenizer_xlmroberta(self) -> bool: raise ValueError(f"unknown tokenizer: {toktyp}") +@ModelBase.register("NeoBERT", "NeoBERTLMHead", "NeoBERTForSequenceClassification") +class NeoBert(BertModel): + model_arch = gguf.MODEL_ARCH.NEO_BERT + + def set_gguf_parameters(self): + super().set_gguf_parameters() + + # NeoBERT uses 2/3 of the intermediate size as feed forward length + self.gguf_writer.add_feed_forward_length(int(2 * self.hparams["intermediate_size"] / 3)) + self.gguf_writer.add_rope_freq_base(10000.0) # default value for NeoBERT + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) + + f_rms_eps = self.hparams.get("norm_eps", 1e-6) # default value for NeoBERT + self.gguf_writer.add_layer_norm_rms_eps(f_rms_eps) + logger.info(f"gguf: rms norm epsilon = {f_rms_eps}") + + self.gguf_writer.add_pooling_type(gguf.PoolingType.CLS) # https://huggingface.co/chandar-lab/NeoBERT#how-to-use + + def modify_tensors(self, data_torch, name, bid): + if name.startswith("decoder."): + return [] + + if name.startswith("model."): + name = name[6:] + + return super().modify_tensors(data_torch, name, bid) + + @ModelBase.register("XLMRobertaModel", "XLMRobertaForSequenceClassification") class XLMRobertaModel(BertModel): model_arch = gguf.MODEL_ARCH.BERT @@ -4188,6 +4638,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter @ModelBase.register("Gemma3ForCausalLM", "Gemma3ForConditionalGeneration") class Gemma3Model(TextModel): model_arch = gguf.MODEL_ARCH.GEMMA3 + norm_shift = 1.0 # Gemma3RMSNorm adds 1.0 to the norm value def set_vocab(self): self._set_vocab_sentencepiece() @@ -4209,9 +4660,8 @@ def set_gguf_parameters(self): self.gguf_writer.add_value_length(hparams.get("head_dim", 256)) self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1_000_000.0)) # for global layers - # both attn_logit_softcapping and final_logit_softcapping are removed in Gemma3 + # attn_logit_softcapping is removed in Gemma3 assert hparams.get("attn_logit_softcapping") is None - assert hparams.get("final_logit_softcapping") is None self.gguf_writer.add_sliding_window(hparams["sliding_window"]) self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", 4)) if hparams.get("rope_scaling") is not None: @@ -4223,7 +4673,7 @@ def set_gguf_parameters(self): def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: del bid # unused - if name.startswith("language_model."): + if "language_model." in name: name = name.replace("language_model.", "") elif name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \ @@ -4238,8 +4688,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter # ref code in Gemma3RMSNorm # output = output * (1.0 + self.weight.float()) + # note: this is not the case on gemma3n if name.endswith("norm.weight"): - data_torch = data_torch + 1 + data_torch = data_torch + self.norm_shift return [(self.map_tensor_name(name), data_torch)] @@ -4296,21 +4747,116 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [] # skip other tensors -@ModelBase.register("Starcoder2ForCausalLM") -class StarCoder2Model(TextModel): - model_arch = gguf.MODEL_ARCH.STARCODER2 +@ModelBase.register("Gemma3nForConditionalGeneration") +class Gemma3NModel(Gemma3Model): + model_arch = gguf.MODEL_ARCH.GEMMA3N + norm_shift = 0.0 # same value with Gemma3p5RMSNorm scale_shift on python code + _altup_proj: list[Tensor] = [] + _altup_unembd: list[Tensor] = [] -@ModelBase.register("Rwkv6ForCausalLM") -class Rwkv6Model(TextModel): - model_arch = gguf.MODEL_ARCH.RWKV6 + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + assert self.hparams["altup_num_inputs"] == 4, "Current conversion only supports 4 altup inputs" + self._altup_proj = [ + torch.Tensor(), # to be replaced + torch.Tensor(), # to be replaced + torch.Tensor(), # to be replaced + ] + self._altup_unembd = [ + torch.Tensor(), # to be replaced + torch.Tensor(), # to be replaced + torch.Tensor(), # to be replaced + ] def set_vocab(self): - self._set_vocab_rwkv_world() + super().set_vocab() def set_gguf_parameters(self): - block_count = self.hparams["num_hidden_layers"] - head_size = self.hparams["head_size"] + super().set_gguf_parameters() + self.gguf_writer.add_altup_active_idx(self.hparams["altup_active_idx"]) + self.gguf_writer.add_altup_num_inputs(self.hparams["altup_num_inputs"]) + self.gguf_writer.add_embedding_length_per_layer_input(self.hparams["hidden_size_per_layer_input"]) + self.gguf_writer.add_shared_kv_layers(self.hparams["num_kv_shared_layers"]) + + activation_sparsity_scale = [] + for s in self.hparams["activation_sparsity_pattern"]: + normal_dist = torch.distributions.normal.Normal(0, 1) + std_multiplier = normal_dist.icdf(torch.tensor(s, dtype=torch.float32)) + activation_sparsity_scale.append(std_multiplier.item()) + self.gguf_writer.add_activation_sparsity_scale(activation_sparsity_scale) + + sliding_window_pattern = [] + for t in self.hparams["layer_types"]: + sliding_window_pattern.append(t == "sliding_attention") + self.gguf_writer.add_sliding_window_pattern(sliding_window_pattern) + + def _stack_matrices(self, matrices: list[Tensor]) -> Tensor | None: + has_all = all(m.numel() > 0 for m in matrices) + if not has_all: + return None + else: + return torch.stack(matrices, dim=0) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + if name.endswith("_scale"): + name = name + ".weight" + + # TODO: implement self.prediction_coefs.weight.clamp_(...) + + if "language_model." not in name: + return [] # skip non-language model tensors + + if "altup_unembed_projections" in name: + data_torch = data_torch.to(device="cpu") + if ".0." in name: + self._altup_unembd[0] = data_torch + elif ".1." in name: + self._altup_unembd[1] = data_torch + elif ".2." in name: + self._altup_unembd[2] = data_torch + else: + raise ValueError(f"Unknown name: {name}") + out = self._stack_matrices(self._altup_unembd) + if out is not None: + return [(self.map_tensor_name("model.altup_unembed_projections.weight"), out)] + else: + return [] + + if "altup_projections" in name: + data_torch = data_torch.to(device="cpu") + if ".0." in name: + self._altup_proj[0] = data_torch + elif ".1." in name: + self._altup_proj[1] = data_torch + elif ".2." in name: + self._altup_proj[2] = data_torch + else: + raise ValueError(f"Unknown name: {name}") + out = self._stack_matrices(self._altup_proj) + if out is not None: + return [(self.map_tensor_name("model.altup_projections.weight"), out)] + else: + return [] + + return super().modify_tensors(data_torch, name, bid) + + +@ModelBase.register("Starcoder2ForCausalLM") +class StarCoder2Model(TextModel): + model_arch = gguf.MODEL_ARCH.STARCODER2 + + +@ModelBase.register("Rwkv6ForCausalLM") +class Rwkv6Model(TextModel): + model_arch = gguf.MODEL_ARCH.RWKV6 + + def set_vocab(self): + self._set_vocab_rwkv_world() + + def set_gguf_parameters(self): + block_count = self.hparams["num_hidden_layers"] + head_size = self.hparams["head_size"] hidden_size = self.hparams["hidden_size"] layer_norm_eps = self.hparams["layer_norm_epsilon"] rescale_every_n_layers = self.hparams["rescale_every"] @@ -4594,6 +5140,14 @@ def set_gguf_parameters(self): class MambaModel(TextModel): model_arch = gguf.MODEL_ARCH.MAMBA + def __init__(self, dir_model: Path, *args, **kwargs): + # Avoid using AutoConfig for hparams + hparams = kwargs.pop("hparams", None) + if hparams is None: + with open(dir_model / "config.json", "r", encoding="utf-8") as f: + hparams = json.load(f) + super().__init__(dir_model, *args, hparams=hparams, **kwargs) + def set_vocab(self): vocab_size = self.hparams["vocab_size"] # Round vocab size to next multiple of 8 @@ -4668,6 +5222,216 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter return [(new_name, data_torch)] +@ModelBase.register("Mamba2ForCausalLM") +class Mamba2Model(TextModel): + model_arch = gguf.MODEL_ARCH.MAMBA2 + + def __init__(self, dir_model: Path, *args, **kwargs): + # Avoid using AutoConfig for hparams + # It wrongly assumes all Mamba2 models are Mamba-Codestral-7B-v0.1 + hparams = kwargs.pop("hparams", None) + if hparams is None: + with open(dir_model / "config.json", "r", encoding="utf-8") as f: + hparams = json.load(f) + super().__init__(dir_model, *args, hparams=hparams, **kwargs) + self.d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) + self.d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or 2 * self.d_model + self.n_group = self.find_hparam(["n_groups"], optional=True) or 1 + + def set_vocab(self): + vocab_size = self.hparams["vocab_size"] + # Round vocab size to next multiple of 16 + pad_vocab = self.hparams.get("pad_vocab_size_multiple", 16) + # pad using ceiling division + # ref: https://stackoverflow.com/a/17511341/22827863 + vocab_size = -(vocab_size // -pad_vocab) * pad_vocab + self.hparams["vocab_size"] = vocab_size + + if (self.dir_model / "tokenizer.model").is_file(): + self._set_vocab_sentencepiece() + elif (self.dir_model / "tokenizer.model.v3").is_file(): + # mamba-codestral + raise NotImplementedError(f"Please rename {self.dir_model / 'tokenizer.model.v3'} to {self.dir_model / 'tokenizer.model'}") + elif (self.dir_model / "tokenizer.json").is_file(): + self._set_vocab_gpt2() + else: + # Use the GPT-NeoX tokenizer when no tokenizer files are present + self._set_vocab_builtin("gpt-neox", vocab_size) + + def set_gguf_parameters(self): + d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4 + d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 128 + head_dim = self.find_hparam(["mamba_d_head", "head_dim"], optional=True) or 64 + + rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5 + + # Fail early for models which don't have a block expansion factor of 2 + # TODO: does this really matter? + # skip the assertion for FalconH1 Model + if self.model_arch != gguf.MODEL_ARCH.FALCON_H1: + assert self.d_inner == 2 * self.d_model + assert self.d_inner % head_dim == 0 + + self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default + self.gguf_writer.add_embedding_length(self.d_model) + self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading + self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading + self.gguf_writer.add_block_count(self.block_count) + self.gguf_writer.add_ssm_conv_kernel(d_conv) + self.gguf_writer.add_ssm_inner_size(self.d_inner) + self.gguf_writer.add_ssm_state_size(d_state) + self.gguf_writer.add_ssm_time_step_rank(self.d_inner // head_dim) + self.gguf_writer.add_ssm_group_count(self.n_group) + self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps) + self.gguf_writer.add_file_type(self.ftype) + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + + if name.startswith("model.backbone") or name.startswith("model.lm_head"): + # map Mamba-Codestral-7B-v0.1 tensor names to the names used by Mamba-2 + name = name.removeprefix("model.") + + if name.endswith(".dt_bias"): + name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias" + + new_name = self.map_tensor_name(name) + + if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): + data_torch = data_torch.squeeze() + elif any(self.match_model_tensor_name(new_name, t, bid, suffix="") for t in [ + gguf.MODEL_TENSOR.SSM_A, + gguf.MODEL_TENSOR.SSM_D, + ]): + # unsqueeze A to use similar shape semantics as Mamba-1 + # (D is also unsqueezed, but for more straightforward broadcast internally) + data_torch = data_torch.reshape((*data_torch.shape, 1)) + elif self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_NORM, bid): + data_torch = data_torch.reshape((self.n_group, self.d_inner // self.n_group)) + + if name.endswith(".A_log"): + logger.debug("A_log --> A ==> " + new_name) + data_torch = -torch.exp(data_torch) + + yield (new_name, data_torch) + + +@ModelBase.register("JambaForCausalLM") +class JambaModel(TextModel): + model_arch = gguf.MODEL_ARCH.JAMBA + + def get_vocab_base_pre(self, tokenizer) -> str: + del tokenizer # unused + + return "gpt-2" + + def set_vocab(self): + if (self.dir_model / "tokenizer.model").is_file(): + # Using Jamba's tokenizer.json causes errors on model load + # (something about "byte not found in vocab"), + # but there's a working tokenizer.model + self._set_vocab_sentencepiece() + else: + # Some Jamba models only have a tokenizer.json, which works. + self._set_vocab_gpt2() + + def set_gguf_parameters(self): + d_model = self.find_hparam(["hidden_size", "mamba_d_model"]) + d_conv = self.find_hparam(["mamba_d_conv"], optional=True) or 4 + d_inner = self.hparams["mamba_expand"] * d_model + d_state = self.find_hparam(["mamba_d_state"], optional=True) or 16 + # ceiling division + # ref: https://stackoverflow.com/a/17511341/22827863 + # ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58 + dt_rank = self.find_hparam(["mamba_dt_rank"], optional=True) or -(d_model // -16) + rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-6 + n_kv_head = self.hparams["num_key_value_heads"] + attn_offset = self.hparams["attn_layer_offset"] + attn_period = self.hparams["attn_layer_period"] + n_kv_vec = [0 for _ in range(attn_offset)] + [ + n_kv_head if (i - attn_offset) % attn_period == 0 else 0 for i in range(attn_offset, self.block_count) + ] + + self.gguf_writer.add_block_count(self.block_count) + self.gguf_writer.add_context_length(self.find_hparam(["max_position_embeddings", "n_ctx"])) + self.gguf_writer.add_embedding_length(d_model) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) + self.gguf_writer.add_head_count_kv(n_kv_vec) + self.gguf_writer.add_ssm_conv_kernel(d_conv) + self.gguf_writer.add_ssm_inner_size(d_inner) + self.gguf_writer.add_ssm_state_size(d_state) + self.gguf_writer.add_ssm_time_step_rank(dt_rank) + self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps) + self.gguf_writer.add_expert_count(self.hparams["num_experts"]) + self.gguf_writer.add_expert_used_count(self.hparams["num_experts_per_tok"]) + self.gguf_writer.add_file_type(self.ftype) + + _experts: list[dict[str, Tensor]] | None = None + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + + # Mini-Jamba + name = name.replace(".moe.", ".feed_forward.") + if bid is not None: + moe_offset = self.hparams["expert_layer_offset"] + moe_period = self.hparams["expert_layer_period"] + + if not (bid >= moe_offset and (bid - moe_offset) % moe_period == 0): + name = name.replace(".experts.0.", ".") + + # process the experts separately + if ".feed_forward.experts." in name: + n_experts = self.hparams["num_experts"] + + assert bid is not None + + if self._experts is None: + self._experts = [{} for _ in range(self.block_count)] + + self._experts[bid][name] = data_torch + + if len(self._experts[bid]) >= n_experts * 3: + + # merge the experts into a single 3d tensor + for wid in ["down_proj", "gate_proj", "up_proj"]: + datas: list[Tensor] = [] + + for xid in range(n_experts): + ename = f"model.layers.{bid}.feed_forward.experts.{xid}.{wid}.weight" + datas.append(self._experts[bid][ename]) + del self._experts[bid][ename] + + data_torch = torch.stack(datas, dim=0) + + # using the same merged name as qwen2moe + merged_name = f"model.layers.{bid}.mlp.experts.{wid}.weight" + + new_name = self.map_tensor_name(merged_name) + + yield new_name, data_torch + return + + new_name = self.map_tensor_name(name) + + if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): + data_torch = data_torch.squeeze() + + if name.endswith(".A_log"): + logger.debug("A_log --> A ==> " + new_name) + data_torch = -torch.exp(data_torch) + + yield (new_name, data_torch) + + def prepare_tensors(self): + super().prepare_tensors() + + if self._experts is not None: + # flatten `list[dict[str, Tensor]]` into `list[str]` + experts = [k for d in self._experts for k in d.keys()] + if len(experts) > 0: + raise ValueError(f"Unprocessed experts: {experts}") + + @ModelBase.register("CohereForCausalLM") class CommandR2Model(TextModel): model_arch = gguf.MODEL_ARCH.COMMAND_R @@ -4813,8 +5577,6 @@ def set_vocab(self): self.gguf_writer.add_token_type_count(2) else: raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel') - self.gguf_writer.add_add_bos_token(True) - self.gguf_writer.add_add_eos_token(True) @ModelBase.register("OpenELMForCausalLM") @@ -5056,9 +5818,7 @@ def set_vocab(self): def set_gguf_parameters(self): super().set_gguf_parameters() hparams = self.hparams - if "head_dim" in hparams: - rope_dim = hparams["head_dim"] - else: + if (rope_dim := hparams.get("head_dim")) is None: rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] self.gguf_writer.add_rope_dimension_count(rope_dim) @@ -5140,7 +5900,58 @@ class DeepseekV2Model(TextModel): model_arch = gguf.MODEL_ARCH.DEEPSEEK2 def set_vocab(self): - self._set_vocab_gpt2() + try: + self._set_vocab_gpt2() + return + except Exception: + pass + + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) + tokpre = self.get_vocab_base_pre(tokenizer) + + if tokpre == "kimi-k2": + # Build merges list using the approach similar to HunYuanMoE + merges = [] + vocab = {} + mergeable_ranks = tokenizer.model._mergeable_ranks + for token, rank in mergeable_ranks.items(): + vocab[QwenModel.token_bytes_to_string(token)] = rank + if len(token) == 1: + continue + merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) + if len(merged) == 2: + merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) + + # Build token list + vocab_size = self.hparams["vocab_size"] + special_tokens = tokenizer.special_tokens + reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()} + tokens: list[str] = [] + toktypes: list[int] = [] + + for i in range(vocab_size): + if i not in reverse_vocab: + tokens.append(f"[PAD{i}]") + toktypes.append(gguf.TokenType.UNUSED) + else: + token = reverse_vocab[i] + tokens.append(token) + if i in special_tokens.values(): + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.NORMAL) + + self.gguf_writer.add_tokenizer_model("gpt2") + self.gguf_writer.add_tokenizer_pre(tokpre) + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + self.gguf_writer.add_token_merges(merges) + + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) + special_vocab.add_to_gguf(self.gguf_writer) + else: + raise NotImplementedError(f"Deepseek pre-tokenizer {tokpre!r} is not supported yet!") def set_gguf_parameters(self): @@ -5262,6 +6073,34 @@ def prepare_tensors(self): raise ValueError(f"Unprocessed experts: {experts}") +@ModelBase.register("Dots1ForCausalLM") +class Dots1Model(Qwen2MoeModel): + model_arch = gguf.MODEL_ARCH.DOTS1 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.hparams["num_experts"] = self.hparams["n_routed_experts"] + + def set_gguf_parameters(self): + super().set_gguf_parameters() + self.gguf_writer.add_leading_dense_block_count(self.hparams["first_k_dense_replace"]) + self.gguf_writer.add_expert_shared_count(self.hparams["n_shared_experts"]) + self.gguf_writer.add_expert_weights_scale(self.hparams["routed_scaling_factor"]) + self.gguf_writer.add_expert_weights_norm(self.hparams["norm_topk_prob"]) + + if self.hparams["scoring_func"] == "noaux_tc": + self.gguf_writer.add_expert_gating_func(gguf.ExpertGatingFuncType.SIGMOID) + else: + raise ValueError(f"Unsupported scoring_func value: {self.hparams['scoring_func']}") + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None): + if name.endswith("e_score_correction_bias"): + name = name.replace("e_score_correction_bias", "e_score_correction.bias") + if "shared_experts" in name: + return [(self.map_tensor_name(name), data_torch)] + return super().modify_tensors(data_torch, name, bid) + + @ModelBase.register("PLMForCausalLM") class PLMModel(TextModel): model_arch = gguf.MODEL_ARCH.PLM @@ -5390,9 +6229,6 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) - self.gguf_writer.add_add_bos_token(False) - self.gguf_writer.add_add_eos_token(True) - def set_gguf_parameters(self): if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None: logger.warning("Couldn't find context length in config.json, assuming default value of 512") @@ -5530,9 +6366,6 @@ def set_vocab(self): special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) special_vocab.add_to_gguf(self.gguf_writer) - self.gguf_writer.add_add_bos_token(False) - self.gguf_writer.add_add_eos_token(True) - def set_gguf_parameters(self): if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None: logger.warning("Couldn't find context length in config.json, assuming default value of 512") @@ -5920,7 +6753,8 @@ def generate_extra_tensors(self) -> Iterable[tuple[str, Tensor]]: if rope_scaling := self.find_hparam(["rope_scaling"], optional=True): if rope_scaling.get("rope_type", '').lower() == "llama3": base = self.hparams.get("rope_theta", 10000.0) - dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"]) + if (dim := self.hparams.get("head_dim")) is None: + dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"] freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim)) factor = rope_scaling.get("factor", 8.0) @@ -6010,18 +6844,148 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_EXP, bid), up), ] + has_experts = bool(self.hparams.get('num_local_experts')) + if name.endswith("shared_mlp.input_linear.weight"): ffn_dim = self.hparams["shared_intermediate_size"] assert data_torch.shape[-2] == 2 * ffn_dim, "Merged FFN tensor size must be 2 * shared_intermediate_size" gate, up = data_torch.split(ffn_dim, dim=-2) + if has_experts: + return [ + (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate), + (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up), + ] + return [ + (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE, bid), gate), + (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP, bid), up), + ] + + if not has_experts and name.endswith("shared_mlp.output_linear.weight"): return [ - (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_GATE_SHEXP, bid), gate), - (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_UP_SHEXP, bid), up), + (self.format_tensor_name(gguf.MODEL_TENSOR.FFN_DOWN, bid), data_torch) ] return super().modify_tensors(data_torch, name, bid) +@ModelBase.register("GraniteMoeHybridForCausalLM", "BambaForCausalLM") +class GraniteHybridModel(Mamba2Model, GraniteMoeModel): + """GraniteHybrid is a hybrid SSM + Attention model that uses Mamba2 SSM + layers and optionally uses MoE w/ a shared expert""" + model_arch = gguf.MODEL_ARCH.GRANITE_HYBRID + undo_permute = True + + def __init__(self, *args, **kwargs): + + # Hybrid mamba models use a prefix for the mamba-specific params. + # TODO: Extend this if the prefix(es) need to be configurable + self.hparam_prefixes = ["mamba"] + + super().__init__(*args, **kwargs) + + # Lists of which layers use ssm vs attention + self._attn_layers = self.get_attn_layers() + self._ssm_layers = [ + i for i in range(self.block_count) + if i not in self._attn_layers + ] + + # n_group and d_inner are used during reshape_tensors for mamba2 + self.d_model = self.find_hparam(["hidden_size", "d_model"]) + self.n_group = self.find_hparam(["n_groups"]) + self.d_inner = self.find_hparam(["expand"]) * self.d_model + + def get_attn_layers(self): + # Explicit list of layer type names + if layer_types := self.hparams.get("layer_types"): + return [ + i for i, typ in enumerate(layer_types) + if typ == "attention" + ] + + # Layer types indicated by index or period + attn_layers = self.hparams.get("attn_layer_indices", []) + if not attn_layers: + attn_period = self.hparams.get("attn_layer_period") + assert attn_period, "Didn't find attn_layer_indices or attn_layer_period" + attn_offset = self.hparams.get("attn_layer_offset") + assert attn_offset is not None, "No attention layer offset set with attn_layer_period" + attn_layers = [ + i for i in range(self.block_count) + if i % attn_period == attn_offset + ] + return attn_layers + + def find_hparam(self, keys: Iterable[str], *args, **kwargs) -> Any: + prefixed = [] + for pfx in self.hparam_prefixes: + prefixed.extend( + "_".join([pfx, k]) + for k in keys + ) + keys = list(keys) + prefixed + return Mamba2Model.find_hparam(self, keys, *args, **kwargs) + + def modify_tensors( + self, data_torch: Tensor, name: str, bid: int | None + ) -> Iterable[tuple[str, Tensor]]: + if ( + name.endswith("block_sparse_moe.input_linear.weight") + or "shared_mlp" in name + ): + return GraniteMoeModel.modify_tensors(self, data_torch, name, bid) + + # Determine whether this is a mamba layer or an attention layer + if bid in self._ssm_layers: + return Mamba2Model.modify_tensors(self, data_torch, name, bid) + elif bid in self._attn_layers: + return GraniteMoeModel.modify_tensors(self, data_torch, name, bid) + return [(self.map_tensor_name(name), data_torch)] + + def set_gguf_parameters(self): + """This method merges params from both parents and some that are + specific to this model. The result is some duplication of how the params + get set. The following warnings are expected during conversion: + + WARNING:Duplicated key name 'granitehybrid.attention.head_count_kv' + WARNING:Duplicated key name 'granitehybrid.context_length' + """ + GraniteMoeModel.set_gguf_parameters(self) + + ## Mamba mixer params ## + self.gguf_writer.add_ssm_conv_kernel(self.find_hparam(["conv_kernel", "d_conv"])) + self.gguf_writer.add_ssm_state_size(self.find_hparam(["state_size", "d_state"])) + self.gguf_writer.add_ssm_group_count(self.n_group) + self.gguf_writer.add_ssm_inner_size(self.d_inner) + # NOTE: The mamba_dt_rank is _not_ the right field for how this is used + # in llama.cpp + self.gguf_writer.add_ssm_time_step_rank(self.find_hparam(["n_heads"])) + + ## Attention params ## + head_count_kv = self.find_hparam(["num_key_value_heads", "n_head_kv"]) + head_count_kv_vec = [ + head_count_kv if i in self._attn_layers else 0 for i in range(self.block_count) + ] + if rope_dim := self.hparams.get("attn_rotary_emb"): + self.gguf_writer.add_rope_dimension_count(rope_dim) + self.gguf_writer.add_head_count_kv(head_count_kv_vec) + + ## If Bamba, use rope, otherwise don't + use_rope = "BambaForCausalLM" in self.hparams["architectures"] + self.gguf_writer.add_rope_scaling_finetuned(use_rope) + if not use_rope: + self.gguf_writer.add_context_length(2**20) + + ## Validation ## + d_head = self.find_hparam(["d_head"], optional=True) or 64 + assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported" + assert self.d_inner % d_head == 0, f"SSM inner size {self.d_inner} not a multiple of head dim {d_head}" + + def set_vocab(self): + self.hparams["pad_vocab_size_multiple"] = 8 + Mamba2Model.set_vocab(self) + + @ModelBase.register("BailingMoeForCausalLM") class BailingMoeModel(TextModel): model_arch = gguf.MODEL_ARCH.BAILINGMOE @@ -6032,7 +6996,8 @@ def set_vocab(self): def set_gguf_parameters(self): super().set_gguf_parameters() hparams = self.hparams - rope_dim = hparams.get("head_dim") or hparams["hidden_size"] // hparams["num_attention_heads"] + if (rope_dim := hparams.get("head_dim")) is None: + rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"] self.gguf_writer.add_rope_dimension_count(rope_dim) rope_scaling = self.hparams.get("rope_scaling") or {} @@ -6064,7 +7029,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter n_head = self.hparams["num_attention_heads"] n_kv_head = self.hparams.get("num_key_value_heads") n_embd = self.hparams["hidden_size"] - head_dim = self.hparams.get("head_dim") or n_embd // n_head + if (head_dim := self.hparams.get("head_dim")) is None: + head_dim = n_embd // n_head output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT) @@ -6228,6 +7194,321 @@ def set_gguf_parameters(self): super().set_gguf_parameters() self.gguf_writer.add_audio_stack_factor(self.global_config["stack_factor"]) + +@ModelBase.register("FalconH1ForCausalLM") +class FalconH1Model(Mamba2Model): + model_arch = gguf.MODEL_ARCH.FALCON_H1 + + def __init__(self, *args, **kwargs): + # Set the hparam prefixes for Falcon Mamba2 + self.hparam_prefixes = ["mamba"] + + # Initialize the base Mamba2Model + super().__init__(*args, **kwargs) + + # Use Llama conversion for attention + self._transformer_model_class = LlamaModel + + # n_group and d_inner are used during reshape_tensors for mamba2 + self.n_group = self.find_hparam(["n_groups"]) + self.d_inner = self.find_hparam(["mamba_d_ssm"]) + self.d_head = self.find_hparam(["d_head"]) + + # Initialize any Falcon Mamba2 specific attributes + self.has_attention = True # Falcon Mamba2 has attention components + + # Load Falcon-H1 multipliers from hyperparameters + self.attention_in_multiplier = self.find_hparam(["attention_in_multiplier"], optional=True) + self.attention_out_multiplier = self.find_hparam(["attention_out_multiplier"], optional=True) + self.ssm_in_multiplier = self.find_hparam(["ssm_in_multiplier"], optional=True) + self.ssm_out_multiplier = self.find_hparam(["ssm_out_multiplier"], optional=True) + self.mlp_multipliers = self.find_hparam(["mlp_multipliers"], optional=True) + self.ssm_multipliers = self.find_hparam(["ssm_multipliers"], optional=True) + self.intermediate_size = self.find_hparam(["intermediate_size"]) + self.key_multiplier = self.find_hparam(["key_multiplier"], optional=True) + + def find_hparam(self, keys: Iterable[str], *args, **kwargs) -> Any: + prefixed = [] + for pfx in self.hparam_prefixes: + prefixed.extend( + "_".join([pfx, k]) + for k in keys + ) + keys = list(keys) + prefixed + return super().find_hparam(keys, *args, **kwargs) + + def set_vocab(self): + self._set_vocab_gpt2() + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + tensors = list(super().modify_tensors(data_torch, name, bid)) + tensor = tensors[0][1] + + if "down_proj" in name: + tensor = tensor * self.mlp_multipliers[1] + elif "gate_proj" in name: + tensor = tensor * self.mlp_multipliers[0] + elif "k_proj" in name: + tensor = tensor * self.key_multiplier * self.attention_in_multiplier + elif "q_proj" in name: + tensor = tensor * self.attention_in_multiplier + elif "v_proj" in name: + tensor = tensor * self.attention_in_multiplier + elif "o_proj" in name: + tensor = tensor * self.attention_out_multiplier + elif "out_proj" in name: + tensor = tensor * self.ssm_out_multiplier + elif "in_proj" in name: + tensor = tensor * self.ssm_in_multiplier + zxbcdt_multipliers = self.hparams["ssm_multipliers"] + intermediate_size = self.hparams["mamba_d_ssm"] + groups_time_state_size = self.hparams["mamba_n_groups"] * self.hparams["mamba_d_state"] + tensor[:intermediate_size, :] *= zxbcdt_multipliers[0] + tensor[intermediate_size:2 * intermediate_size, :] *= zxbcdt_multipliers[1] + tensor[2 * intermediate_size:2 * intermediate_size + groups_time_state_size, :] *= zxbcdt_multipliers[2] + tensor[2 * intermediate_size + groups_time_state_size:2 * intermediate_size + 2 * groups_time_state_size, :] *= zxbcdt_multipliers[3] + tensor[2 * intermediate_size + 2 * groups_time_state_size:, :] *= zxbcdt_multipliers[4] + elif "lm_head" in name: + tensor = tensor * self.hparams["lm_head_multiplier"] + elif "embed_tokens" in name: + tensor = tensor * self.hparams["embedding_multiplier"] + elif "mamba.norm" in name: + tensor = tensor.reshape(self.n_group, self.d_inner // self.n_group) + + tensors = [(tensors[0][0], tensor)] + return tensors + + def set_gguf_parameters(self): + super().set_gguf_parameters() + + ## General Params ## + self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) + # Override some Mamba2 defaults + self.gguf_writer.add_block_count(self.block_count) + self.gguf_writer.add_context_length(self.hparams.get("max_position_embeddings", 0)) + self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) + + ## Attention params ## + self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) # Override value 0 from Mamba2 + self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"]) + self.gguf_writer.add_key_length(self.hparams["head_dim"]) + self.gguf_writer.add_value_length(self.hparams["head_dim"]) + + ## Validation ## + assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported" + assert self.d_inner % self.d_head == 0, f"SSM inner size {self.d_inner} not a multiple of head dim {self.d_head}" + + # Add any other Falcon Mamba2 specific configuration + self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"])) + + +@ModelBase.register("HunYuanMoEV1ForCausalLM") +class HunYuanMoEModel(TextModel): + model_arch = gguf.MODEL_ARCH.HUNYUAN_MOE + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + # For handling tied embeddings + self._tok_embd = None + + def set_vocab(self): + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True) + + # 1. Get the pre-tokenizer identifier hash + tokpre = self.get_vocab_base_pre(tokenizer) + + # 2. Reverse-engineer the merges list from mergeable_ranks + merges = [] + vocab = {} + mergeable_ranks = tokenizer.mergeable_ranks + for token, rank in mergeable_ranks.items(): + vocab[QwenModel.token_bytes_to_string(token)] = rank + if len(token) == 1: + continue + merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank) + if len(merged) == 2: # todo this is an assert in Qwen, why? + merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged))) + + # 3. Generate the tokens and toktypes lists + vocab_size = self.hparams["vocab_size"] + assert tokenizer.vocab_size == vocab_size + special_tokens = tokenizer.special_tokens + reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()} + tokens: list[str] = [] + toktypes: list[int] = [] + for i in range(vocab_size): + if i not in reverse_vocab: + tokens.append(f"[PAD{i}]") + toktypes.append(gguf.TokenType.UNUSED) + else: + token = reverse_vocab[i] + tokens.append(token) + if i in special_tokens.values(): + toktypes.append(gguf.TokenType.CONTROL) + else: + toktypes.append(gguf.TokenType.NORMAL) + + # 4. Write all vocab-related fields to the GGUF writer + self.gguf_writer.add_tokenizer_model("gpt2") + self.gguf_writer.add_tokenizer_pre(tokpre) + self.gguf_writer.add_token_list(tokens) + self.gguf_writer.add_token_types(toktypes) + self.gguf_writer.add_token_merges(merges) + + # 5. Add special tokens and chat templates + special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False) + special_vocab.add_to_gguf(self.gguf_writer) + # FIX for BOS token: Overwrite incorrect id read from config.json + self.gguf_writer.add_bos_token_id(127959) # <|bos|> + + def set_gguf_parameters(self): + super().set_gguf_parameters() + hparams = self.hparams + + self.gguf_writer.add_expert_count(hparams["num_experts"]) + self.gguf_writer.add_expert_shared_feed_forward_length(hparams["intermediate_size"]) + + moe_intermediate_size = hparams["moe_intermediate_size"] + assert all(n == moe_intermediate_size[0] for n in moe_intermediate_size) + self.gguf_writer.add_expert_feed_forward_length(moe_intermediate_size[0]) + + moe_topk = hparams["moe_topk"] + assert all(topk == moe_topk[0] for topk in moe_topk) + self.gguf_writer.add_expert_used_count(moe_topk[0]) + + moe_shared_expert = hparams["num_shared_expert"] + assert all(n == moe_shared_expert[0] for n in moe_shared_expert) + self.gguf_writer.add_expert_shared_count(moe_shared_expert[0]) + + # Rope + rope_scaling = hparams.get("rope_scaling", {}) + if rope_scaling.get("type") == "dynamic": + # HunYuan uses NTK Aware Alpha based scaling. Original implementation: https://www.reddit.com/r/LocalLLaMA/comments/14lz7j5/ntkaware_scaled_rope_allows_llama_models_to_have/ + # 1000 corresponds to a usable context length of 256k (https://github.com/Tencent-Hunyuan/Hunyuan-A13B/blob/main/report/Hunyuan_A13B_Technical_Report.pdf) + alpha = rope_scaling.get("alpha", 1000) + base = hparams.get("rope_theta", 10000.0) + dim = (hparams["hidden_size"] // hparams["num_attention_heads"]) # 128 + scaled_base = base * (alpha ** (dim / (dim - 2))) # 10000 * (1000 ** (128 / 126)) = 11158839.9251 + self.gguf_writer.add_rope_freq_base(scaled_base) + self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE) + self.gguf_writer.add_rope_scaling_factor(1) + # There is no consistent way to calculate ctx from alpha, and the config is incorrectly set to 32k + self.gguf_writer.add_rope_scaling_orig_ctx_len(256 * 1024) # 256k context length + self.gguf_writer.add_context_length(256 * 1024) # 256k context length + + # if any of our assumptions about the values are wrong, something has changed and this may need to be updated + assert alpha == 1000 and base == 10000.0 and dim == 128 and self.hparams["max_position_embeddings"] in [32 * 1024, 256 * 1024] , \ + "HunYuan dynamic RoPE scaling assumptions changed, please update the logic or context length manually" + + _experts: list[dict[str, Tensor]] | None = None + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + if name == "model.embed_tokens.weight": + self._tok_embd = data_torch.clone() + + if name == "lm_head.weight": + if self.hparams.get("tie_word_embeddings", False): + logger.info("Skipping tied output layer 'lm_head.weight'") + return [] + + if name.find("mlp.experts") != -1: + n_experts = self.hparams["num_experts"] + assert bid is not None + + if self._experts is None: + self._experts = [{} for _ in range(self.block_count)] + + self._experts[bid][name] = data_torch + + if len(self._experts[bid]) >= n_experts * 3: + # merge the experts into a single 3d tensor + tensors: list[tuple[str, Tensor]] = [] + for w_name in ["down_proj", "gate_proj", "up_proj"]: + datas: list[Tensor] = [] + + for xid in range(n_experts): + ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight" + datas.append(self._experts[bid][ename]) + del self._experts[bid][ename] + + data_torch = torch.stack(datas, dim=0) + merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight" + new_name = self.map_tensor_name(merged_name) + tensors.append((new_name, data_torch)) + + return tensors + else: + return [] + + return [(self.map_tensor_name(name), data_torch)] + + def prepare_tensors(self): + super().prepare_tensors() + if self._experts is not None: + experts = [k for d in self._experts for k in d.keys()] + if len(experts) > 0: + raise ValueError(f"Unprocessed experts: {experts}") + + +@ModelBase.register("SmolLM3ForCausalLM") +class SmolLM3Model(LlamaModel): + model_arch = gguf.MODEL_ARCH.SMOLLM3 + + def set_vocab(self): + super().set_vocab() + # remove unsupported array slicing in chat template + # ref: https://huggingface.co/ggml-org/SmolLM3-3B-GGUF/discussions/1 + from transformers import AutoTokenizer + tokenizer = AutoTokenizer.from_pretrained(self.dir_model) + if tokenizer.chat_template is not None: + chat_template = tokenizer.chat_template.replace("[:]", "") + self.gguf_writer.add_chat_template(chat_template) + + +@ModelBase.register("Lfm2ForCausalLM") +@ModelBase.register("LFM2ForCausalLM") +class LFM2Model(TextModel): + model_arch = gguf.MODEL_ARCH.LFM2 + + def _add_feed_forward_length(self): + ff_dim = self.hparams["block_ff_dim"] + + auto_adjust_ff_dim = self.hparams["block_auto_adjust_ff_dim"] + ff_dim = self.hparams["block_ff_dim"] + ffn_dim_multiplier = self.hparams["block_ffn_dim_multiplier"] + multiple_of = self.hparams["block_multiple_of"] + + if auto_adjust_ff_dim: + ff_dim = int(2 * ff_dim / 3) + # custom dim factor multiplier + if ffn_dim_multiplier is not None: + ff_dim = int(ffn_dim_multiplier * ff_dim) + ff_dim = multiple_of * ((ff_dim + multiple_of - 1) // multiple_of) + + self.gguf_writer.add_feed_forward_length(ff_dim) + + def set_gguf_parameters(self): + # set num_key_value_heads only for attention layers + self.hparams["num_key_value_heads"] = [ + self.hparams["num_key_value_heads"] if layer_type == "full_attention" else 0 + for layer_type in self.hparams["layer_types"] + ] + + super().set_gguf_parameters() + self.gguf_writer.add_vocab_size(self.hparams["vocab_size"]) + self.gguf_writer.add_shortconv_l_cache(self.hparams["conv_L_cache"]) + self.gguf_writer.add_layer_norm_rms_eps(self.hparams["norm_eps"]) + self._add_feed_forward_length() + + def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: + # conv op requires 2d tensor + if 'conv.conv' in name: + data_torch = data_torch.squeeze(1) + + return [(self.map_tensor_name(name), data_torch)] + + ###### CONVERSION LOGIC ###### @@ -6325,8 +7606,8 @@ def parse_args() -> argparse.Namespace: help="model is executed on big endian machine", ) parser.add_argument( - "model", type=Path, - help="directory containing model file", + "model", type=str, + help="directory containing model file or huggingface repository ID (if --remote)", nargs="?", ) parser.add_argument( @@ -6407,12 +7688,20 @@ def get_model_architecture(hparams: dict[str, Any], model_type: ModelType) -> st # maybe we should fallback to text model's arch in that case, since not many models have both text_config = hparams.get("text_config", {}) vision_config = hparams.get("vision_config", {}) - arch = hparams["architectures"][0] + arch = None + if (arches := hparams.get("architectures")) is not None and len(arches) > 0: + arch = arches[0] + elif "ssm_cfg" in hparams: + # For non-hf Mamba and Mamba2 models + arch = hparams["ssm_cfg"].get("layer", "Mamba") + "ForCausalLM" + # if "architectures" is found in the sub-config, use that instead if model_type == ModelType.TEXT and text_config.get("architectures") is not None: arch = text_config["architectures"][0] elif model_type == ModelType.MMPROJ and vision_config.get("architectures") is not None: arch = vision_config["architectures"][0] + if arch is None: + raise ValueError("Failed to detect model architecture") return arch @@ -6429,18 +7718,20 @@ def main() -> None: else: logging.basicConfig(level=logging.INFO) - dir_model = args.model - if args.remote: + hf_repo_id = args.model from huggingface_hub import snapshot_download local_dir = snapshot_download( - repo_id=str(dir_model), + repo_id=hf_repo_id, allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"]) dir_model = Path(local_dir) logger.info(f"Downloaded config and tokenizer to {local_dir}") + else: + hf_repo_id = None + dir_model = Path(args.model) if not dir_model.is_dir(): - logger.error(f'Error: {args.model} is not a directory') + logger.error(f'Error: {dir_model} is not a directory') sys.exit(1) ftype_map: dict[str, gguf.LlamaFileType] = { @@ -6460,9 +7751,9 @@ def main() -> None: if args.outfile is not None: fname_out = args.outfile - elif args.remote: + elif hf_repo_id: # if remote, use the model ID as the output file name - fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf") + fname_out = Path("./" + hf_repo_id.replace("/", "-") + "-{ftype}.gguf") else: fname_out = dir_model @@ -6491,7 +7782,7 @@ def main() -> None: split_max_tensors=args.split_max_tensors, split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, small_first_shard=args.no_tensor_first_split, - remote_hf_model_id=str(args.model) if args.remote else None) + remote_hf_model_id=hf_repo_id) if args.vocab_only: logger.info("Exporting model vocab...") diff --git a/convert_hf_to_gguf_update.py b/convert_hf_to_gguf_update.py index 2f733f0973686..f7b6d97b19c8b 100755 --- a/convert_hf_to_gguf_update.py +++ b/convert_hf_to_gguf_update.py @@ -7,7 +7,6 @@ import re import requests -import sys import json import shutil import argparse @@ -69,8 +68,7 @@ class TOKENIZER_TYPE(IntEnum): hf_token = args.hf_token if args.hf_token is not None else hf_token if hf_token is None: - logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token") - sys.exit(1) + logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token") # TODO: this string has to exercise as much pre-tokenizer functionality as possible # will be updated with time - contributions welcome @@ -128,6 +126,9 @@ class TOKENIZER_TYPE(IntEnum): {"name": "llama4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct", }, {"name": "pixtral", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistral-community/pixtral-12b", }, {"name": "seed-coder", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/ByteDance-Seed/Seed-Coder-8B-Base", }, + {"name": "a.x-4.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/skt/A.X-4.0", }, + {"name": "midm-2.0", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct", }, + {"name": "lfm2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LiquidAI/LFM2-Tokenizer"}, ] # some models are known to be broken upstream, so we will skip them as exceptions @@ -137,11 +138,18 @@ class TOKENIZER_TYPE(IntEnum): {"name": "chatglm-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-chat", "chkhsh": "81d72c7348a9f0ebe86f23298d37debe0a5e71149e29bd283904c02262b27516"}, {"name": "glm4", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/THUDM/glm-4-9b-hf", "chkhsh": "a1336059768a55c99a734006ffb02203cd450fed003e9a71886c88acf24fdbc2"}, {"name": "minerva-7b", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/sapienzanlp/Minerva-7B-base-v1.0", "chkhsh": "1431a23e583c97432bc230bff598d103ddb5a1f89960c8f1d1051aaa944d0b35"}, + {"name": "hunyuan", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tencent/Hunyuan-A13B-Instruct", "chkhsh": "7e57df22b1fe23a7b1e1c7f3dc4e3f96d43a4eb0836d0c6bdc3436d7b2f1c664"}, + # falcon-h1 series uses 4 different tokenizers across model sizes (0.5b - 34b), hence we need to define 4 different hashes + {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-0.5B-Base", "chkhsh": "a6b57017d60e6edb4d88ecc2845188e0eb333a70357e45dcc9b53964a73bbae6"}, + {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-1B-Base", "chkhsh": "60476e1243776c4fb1b993dbd7a5f15ac22f83c80afdf425fa5ae01c8d44ef86"}, + {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-7B-Base", "chkhsh": "3eda48b4c4dc7de733d1a8b3e3b4a85243dbbf704da2ee9d42c6beced8897896"}, + {"name": "falcon-h1", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/tiiuae/Falcon-H1-34B-Base", "chkhsh": "48f8e02c0359c0bbdd82f26909171fac1c18a457bb47573ed1fe3bbb2c1cfd4b"}, + {"name": "kimi-k2", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/moonshotai/Kimi-K2-Base", "chkhsh": "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890"}, ] def download_file_with_auth(url, token, save_path): - headers = {"Authorization": f"Bearer {token}"} + headers = {"Authorization": f"Bearer {token}"} if token else None response = sess.get(url, headers=headers) response.raise_for_status() os.makedirs(os.path.dirname(save_path), exist_ok=True) @@ -222,7 +230,7 @@ def get_existing_models(convert_py): # generate the source code for the convert_hf_to_gguf.py:get_vocab_base_pre() function: src_ifs = "" -for model in [*all_models, *pre_computed_hashes]: +for model in [*pre_computed_hashes, *all_models]: name = model["name"] tokt = model["tokt"] chkhsh = model.get("chkhsh") @@ -230,11 +238,6 @@ def get_existing_models(convert_py): if tokt == TOKENIZER_TYPE.SPM or tokt == TOKENIZER_TYPE.UGM: continue - # Skip if the tokenizer folder does not exist or there are other download issues previously - if not os.path.exists(f"models/tokenizers/{name}"): - logger.warning(f"Directory for tokenizer {name} not found. Skipping...") - continue - # create the tokenizer if chkhsh is not None: # if the model has a pre-computed hash, use it @@ -244,15 +247,19 @@ def get_existing_models(convert_py): chkhsh = existing_models[name] else: # otherwise, compute the hash of the tokenizer + + # Fail if the tokenizer folder with config does not exist or there are other download issues previously + if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"): + raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.") + try: logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...") if name == "t5": tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False) else: tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") - except OSError as e: - logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}") - continue # Skip to the next model if the tokenizer can't be loaded + except Exception as e: + raise OSError(f"Error loading tokenizer for model {name}.") from e chktok = tokenizer.encode(CHK_TXT) chkhsh = sha256(str(chktok).encode()).hexdigest() diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 249e73451e66b..6e9b88935da97 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -757,7 +757,7 @@ use 1 SYCL GPUs: [0] with Max compute units:512 | Name | Value | Function | |-------------------|------------------|---------------------------------------------------------------------------------------------------------------------------| | GGML_SYCL_DEBUG | 0 (default) or 1 | Enable log function by macro: GGML_SYCL_DEBUG | -| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features based on Intel GPU type, to compare the performance increase | +| GGML_SYCL_DISABLE_OPT | 0 (default) or 1 | Disable optimize features for Intel GPUs. (Recommended to 1 for intel devices older than Gen 10) | | GGML_SYCL_DISABLE_GRAPH | 0 or 1 (default) | Disable running computations through SYCL Graphs feature. Disabled by default because graph performance isn't yet better than non-graph performance. | | GGML_SYCL_DISABLE_DNN | 0 (default) or 1 | Disable running computations through oneDNN and always use oneMKL. | | ZES_ENABLE_SYSMAN | 0 (default) or 1 | Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory.
Recommended to use when --split-mode = layer | diff --git a/docs/build-s390x.md b/docs/build-s390x.md new file mode 100644 index 0000000000000..4c9ebb271cee2 --- /dev/null +++ b/docs/build-s390x.md @@ -0,0 +1,246 @@ +> [!IMPORTANT] +> This build documentation is specific only to IBM Z & LinuxONE mainframes (s390x). You can find the build documentation for other architectures: [build.md](build.md). + +# Build llama.cpp locally (for s390x) + +The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](../include/llama.h). + +The project also includes many example programs and tools using the `llama` library. The examples range from simple, minimal code snippets to sophisticated sub-projects such as an OpenAI-compatible HTTP server. + +**To get the code:** + +```bash +git clone https://github.com/ggml-org/llama.cpp +cd llama.cpp +``` + +## CPU Build with BLAS + +Building llama.cpp with BLAS support is highly recommended as it has shown to provide performance improvements. Make sure to have OpenBLAS installed in your environment. + +```bash +cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_BLAS=ON \ + -DGGML_BLAS_VENDOR=OpenBLAS + +cmake --build build --config Release -j $(nproc) +``` + +**Notes**: + +- For faster repeated compilation, install [ccache](https://ccache.dev/) +- By default, VXE/VXE2 is enabled. To disable it (not recommended): + + ```bash + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_BLAS=ON \ + -DGGML_BLAS_VENDOR=OpenBLAS \ + -DGGML_VXE=OFF + + cmake --build build --config Release -j $(nproc) + ``` + +- By default, NNPA is enabled when available. To disable it (not recommended): + + ```bash + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_BLAS=ON \ + -DGGML_BLAS_VENDOR=OpenBLAS \ + -DGGML_NNPA=OFF + + cmake --build build --config Release -j $(nproc) + ``` + +- For debug builds: + + ```bash + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Debug \ + -DGGML_BLAS=ON \ + -DGGML_BLAS_VENDOR=OpenBLAS + cmake --build build --config Debug -j $(nproc) + ``` + +- For static builds, add `-DBUILD_SHARED_LIBS=OFF`: + + ```bash + cmake -S . -B build \ + -DCMAKE_BUILD_TYPE=Release \ + -DGGML_BLAS=ON \ + -DGGML_BLAS_VENDOR=OpenBLAS \ + -DBUILD_SHARED_LIBS=OFF + + cmake --build build --config Release -j $(nproc) + ``` + +## Getting GGUF Models + +All models need to be converted to Big-Endian. You can achieve this in three cases: + +1. **Use pre-converted models verified for use on IBM Z & LinuxONE (easiest)** + + ![File Type - gguf](https://img.shields.io/badge/File_Type-gguf-fff) + + You can find popular models pre-converted and verified at [s390x Ready Models](https://huggingface.co/collections/taronaeo/s390x-ready-models-672765393af438d0ccb72a08). + + These models have already been converted from `safetensors` to `GGUF Big-Endian` and their respective tokenizers verified to run correctly on IBM z15 and later system. + +2. **Convert safetensors model to GGUF Big-Endian directly (recommended)** + + ![File Type - safetensors](https://img.shields.io/badge/File_Type-safetensors-da1e28) + + The model you are trying to convert must be in `safetensors` file format (for example [IBM Granite 3.3 2B](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct)). Make sure you have downloaded the model repository for this case. + + ```bash + python3 convert_hf_to_gguf.py \ + --outfile model-name-be.f16.gguf \ + --outtype f16 \ + --bigendian \ + model-directory/ + ``` + + For example, + + ```bash + python3 convert_hf_to_gguf.py \ + --outfile granite-3.3-2b-instruct-be.f16.gguf \ + --outtype f16 \ + --bigendian \ + granite-3.3-2b-instruct/ + ``` + +3. **Convert existing GGUF Little-Endian model to Big-Endian** + + ![File Type - gguf](https://img.shields.io/badge/File_Type-gguf-fff) + + The model you are trying to convert must be in `gguf` file format (for example [IBM Granite 3.3 2B](https://huggingface.co/ibm-granite/granite-3.3-2b-instruct-GGUF)). Make sure you have downloaded the model file for this case. + + ```bash + python3 gguf-py/gguf/scripts/gguf_convert_endian.py model-name.f16.gguf BIG + ``` + + For example, + + ```bash + python3 gguf-py/gguf/scripts/gguf_convert_endian.py granite-3.3-2b-instruct-le.f16.gguf BIG + mv granite-3.3-2b-instruct-le.f16.gguf granite-3.3-2b-instruct-be.f16.gguf + ``` + + **Notes:** + + - The GGUF endian conversion script may not support all data types at the moment and may fail for some models/quantizations. When that happens, please try manually converting the safetensors model to GGUF Big-Endian via Step 2. + +## IBM Accelerators + +### 1. SIMD Acceleration + +Only available in IBM z15 or later system with the `-DGGML_VXE=ON` (turned on by default) compile flag. No hardware acceleration is possible with llama.cpp with older systems, such as IBM z14/arch12. In such systems, the APIs can still run but will use a scalar implementation. + +### 2. NNPA Vector Intrinsics Acceleration + +Only available in IBM z16 or later system with the `-DGGML_NNPA=ON` (turned on when available) compile flag. No hardware acceleration is possible with llama.cpp with older systems, such as IBM z15/arch13. In such systems, the APIs can still run but will use a scalar implementation. + +### 3. zDNN Accelerator + +_Only available in IBM z16 or later system. No direction at the moment._ + +### 4. Spyre Accelerator + +_No direction at the moment._ + +## Performance Tuning + +### 1. Virtualization Setup + +It is strongly recommended to use only LPAR (Type-1) virtualization to get the most performance. + +Note: Type-2 virtualization is not supported at the moment, while you can get it running, the performance will not be the best. + +### 2. IFL (Core) Count + +It is recommended to allocate a minimum of 8 shared IFLs assigned to the LPAR. Increasing the IFL count past 8 shared IFLs will only improve Prompt Processing performance but not Token Generation. + +Note: IFL count does not equate to vCPU count. + +### 3. SMT vs NOSMT (Simultaneous Multithreading) + +It is strongly recommended to disable SMT via the kernel boot parameters as it negatively affects performance. Please refer to your Linux distribution's guide on disabling SMT via kernel boot parameters. + +### 4. BLAS vs NOBLAS + +IBM VXE/VXE2 SIMD acceleration depends on the BLAS implementation. It is strongly recommended to use BLAS. + +## Frequently Asked Questions (FAQ) + +1. I'm getting the following error message while trying to load a model: `gguf_init_from_file_impl: failed to load model: this GGUF file version 50331648 is extremely large, is there a mismatch between the host and model endianness?` + + Answer: Please ensure that the model you have downloaded/converted is GGUFv3 Big-Endian. These models are usually denoted with the `-be` suffix, i.e., `granite-3.3-2b-instruct-be.F16.gguf`. + + You may refer to the [Getting GGUF Models](#getting-gguf-models) section to manually convert a `safetensors` model to `GGUF` Big Endian. + +2. I'm getting extremely poor performance when running inference on a model + + Answer: Please refer to the [Appendix B: SIMD Support Matrix](#appendix-b-simd-support-matrix) to check if your model quantization is supported by SIMD acceleration. + +3. I'm building on IBM z17 and getting the following error messages: `invalid switch -march=z17` + + Answer: Please ensure that your GCC compiler is of minimum GCC 15.1.0 version, and have `binutils` updated to the latest version. If this does not fix the problem, kindly open an issue. + +## Getting Help on IBM Z & LinuxONE + +1. **Bugs, Feature Requests** + + Please file an issue in llama.cpp and ensure that the title contains "s390x". + +2. **Other Questions** + + Please reach out directly to [aionz@us.ibm.com](mailto:aionz@us.ibm.com). + +## Appendix A: Hardware Support Matrix + +| | Support | Minimum Compiler Version | +| ------- | ------- | ------------------------ | +| IBM z15 | ✅ | | +| IBM z16 | ✅ | | +| IBM z17 | ✅ | GCC 15.1.0 | + +- ✅ - supported and verified to run as intended +- 🚫 - unsupported, we are unlikely able to provide support + +## Appendix B: SIMD Support Matrix + +| | VX/VXE/VXE2 | NNPA | zDNN | Spyre | +| ---------- | ----------- | ---- | ---- | ----- | +| FP32 | ✅ | ✅ | ❓ | ❓ | +| FP16 | ✅ | ✅ | ❓ | ❓ | +| BF16 | 🚫 | 🚫 | ❓ | ❓ | +| Q4_0 | ✅ | ✅ | ❓ | ❓ | +| Q4_1 | ✅ | ✅ | ❓ | ❓ | +| Q5_0 | 🚫 | 🚫 | ❓ | ❓ | +| Q5_1 | 🚫 | 🚫 | ❓ | ❓ | +| Q8_0 | ✅ | ✅ | ❓ | ❓ | +| Q2_K | 🚫 | 🚫 | ❓ | ❓ | +| Q3_K | ✅ | ✅ | ❓ | ❓ | +| Q4_K | ✅ | ✅ | ❓ | ❓ | +| Q5_K | ✅ | ✅ | ❓ | ❓ | +| Q6_K | ✅ | ✅ | ❓ | ❓ | +| TQ1_0 | 🚫 | 🚫 | ❓ | ❓ | +| TQ2_0 | 🚫 | 🚫 | ❓ | ❓ | +| IQ2_XXS | 🚫 | 🚫 | ❓ | ❓ | +| IQ2_XS | 🚫 | 🚫 | ❓ | ❓ | +| IQ2_S | 🚫 | 🚫 | ❓ | ❓ | +| IQ3_XXS | 🚫 | 🚫 | ❓ | ❓ | +| IQ3_S | 🚫 | 🚫 | ❓ | ❓ | +| IQ1_S | 🚫 | 🚫 | ❓ | ❓ | +| IQ1_M | 🚫 | 🚫 | ❓ | ❓ | +| IQ4_NL | ✅ | ✅ | ❓ | ❓ | +| IQ4_XS | ✅ | ✅ | ❓ | ❓ | +| FP32->FP16 | 🚫 | ✅ | ❓ | ❓ | +| FP16->FP32 | 🚫 | ✅ | ❓ | ❓ | + +- ✅ - acceleration available +- 🚫 - acceleration unavailable, will still run using scalar implementation +- ❓ - acceleration unknown, please contribute if you can test it yourself diff --git a/docs/build.md b/docs/build.md index 680b0d8398741..70767ad91c056 100644 --- a/docs/build.md +++ b/docs/build.md @@ -1,6 +1,6 @@ # Build llama.cpp locally -The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](include/llama.h). +The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](../include/llama.h). The project also includes many example programs and tools using the `llama` library. The examples range from simple, minimal code snippets to sophisticated sub-projects such as an OpenAI-compatible HTTP server. @@ -557,6 +557,27 @@ ninja To read documentation for how to build on Android, [click here](./android.md) +## WebGPU [In Progress] + +The WebGPU backend relies on [Dawn](https://dawn.googlesource.com/dawn). Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/docs/quickstart-cmake.md) to install Dawn locally so that llama.cpp can find it using CMake. The currrent implementation is up-to-date with Dawn commit `bed1a61`. + +In the llama.cpp directory, build with CMake: + +``` +cmake -B build -DGGML_WEBGPU=ON +cmake --build build --config Release +``` + +### Browser Support + +WebGPU allows cross-platform access to the GPU from supported browsers. We utilize [Emscripten](https://emscripten.org/) to compile ggml's WebGPU backend to WebAssembly. Emscripten does not officially support WebGPU bindings yet, but Dawn currently maintains its own WebGPU bindings called emdawnwebgpu. + +Follow the instructions [here](https://dawn.googlesource.com/dawn/+/refs/heads/main/src/emdawnwebgpu/) to download or build the emdawnwebgpu package (Note that it might be safer to build the emdawbwebgpu package locally, so that it stays in sync with the version of Dawn you have installed above). When building using CMake, the path to the emdawnwebgpu port file needs to be set with the flag `EMDAWNWEBGPU_DIR`. + +## IBM Z & LinuxONE + +To read documentation for how to build on IBM Z & LinuxONE, [click here](./build-s390x.md) + ## Notes about GPU-accelerated backends The GPU may still be used to accelerate some parts of the computation even when using the `-ngl 0` option. You can fully disable GPU acceleration by using `--device none`. diff --git a/docs/development/HOWTO-add-model.md b/docs/development/HOWTO-add-model.md index 7f71e0247ddc7..51e0b0b20f58d 100644 --- a/docs/development/HOWTO-add-model.md +++ b/docs/development/HOWTO-add-model.md @@ -83,20 +83,22 @@ NOTE: Tensor names must end with `.weight` or `.bias` suffixes, that is the conv ### 2. Define the model architecture in `llama.cpp` -The model params and tensors layout must be defined in `llama.cpp`: -1. Define a new `llm_arch` -2. Define the tensors layout in `LLM_TENSOR_NAMES` -3. Add any non-standard metadata in `llm_load_hparams` -4. Create the tensors for inference in `llm_load_tensors` -5. If the model has a RoPE operation, add the rope type in `llama_rope_type` +The model params and tensors layout must be defined in `llama.cpp` source files: +1. Define a new `llm_arch` enum value in `src/llama-arch.h`. +2. In `src/llama-arch.cpp`: + - Add the architecture name to the `LLM_ARCH_NAMES` map. + - Add the tensor mappings to the `LLM_TENSOR_NAMES` map. +3. Add any non-standard metadata loading in the `llama_model_loader` constructor in `src/llama-model-loader.cpp`. +4. If the model has a RoPE operation, add a case for the architecture in `llama_model_rope_type` function in `src/llama-model.cpp`. NOTE: The dimensions in `ggml` are typically in the reverse order of the `pytorch` dimensions. ### 3. Build the GGML graph implementation -This is the funniest part, you have to provide the inference graph implementation of the new model architecture in `llama_build_graph`. - -Have a look at existing implementations like `build_llama`, `build_dbrx` or `build_bert`. +This is the funniest part, you have to provide the inference graph implementation of the new model architecture in `src/llama-model.cpp`. +Create a new struct that inherits from `llm_graph_context` and implement the graph-building logic in its constructor. +Have a look at existing implementations like `llm_build_llama`, `llm_build_dbrx` or `llm_build_bert`. +Then, in the `llama_model::build_graph` method, add a case for your architecture to instantiate your new graph-building struct. Some `ggml` backends do not support all operations. Backend implementations can be added in a separate PR. diff --git a/docs/docker.md b/docs/docker.md index f8f0573c17239..cbb333ee32c50 100644 --- a/docs/docker.md +++ b/docs/docker.md @@ -25,6 +25,9 @@ Additionally, there the following images, similar to the above: - `ghcr.io/ggml-org/llama.cpp:full-intel`: Same as `full` but compiled with SYCL support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:light-intel`: Same as `light` but compiled with SYCL support. (platforms: `linux/amd64`) - `ghcr.io/ggml-org/llama.cpp:server-intel`: Same as `server` but compiled with SYCL support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:full-vulkan`: Same as `full` but compiled with Vulkan support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:light-vulkan`: Same as `light` but compiled with Vulkan support. (platforms: `linux/amd64`) +- `ghcr.io/ggml-org/llama.cpp:server-vulkan`: Same as `server` but compiled with Vulkan support. (platforms: `linux/amd64`) The GPU enabled images are not currently tested by CI beyond being built. They are not built with any variation from the ones in the Dockerfiles defined in [.devops/](../.devops/) and the GitHub Action defined in [.github/workflows/docker.yml](../.github/workflows/docker.yml). If you need different settings (for example, a different CUDA, ROCm or MUSA library, you'll need to build the images locally for now). diff --git a/docs/function-calling.md b/docs/function-calling.md index fd3db9bd16a92..37eacaf3100c1 100644 --- a/docs/function-calling.md +++ b/docs/function-calling.md @@ -11,7 +11,7 @@ Function calling is supported for all models (see https://github.com/ggml-org/ll - Llama 3.1 / 3.3 (including builtin tools support - tool names for `wolfram_alpha`, `web_search` / `brave_search`, `code_interpreter`), Llama 3.2 - Functionary v3.1 / v3.2 - Hermes 2/3, Qwen 2.5 - - Qwen 2.5 Coder (WIP: https://github.com/ggml-org/llama.cpp/pull/12034) + - Qwen 2.5 Coder - Mistral Nemo - Firefunction v2 - Command R7B diff --git a/docs/ops.md b/docs/ops.md new file mode 100644 index 0000000000000..f6a06e3b9000e --- /dev/null +++ b/docs/ops.md @@ -0,0 +1,95 @@ +# GGML Operations + +List of GGML operations and backend support status. + +Legend: +- ✅ Fully supported by this backend +- 🟡 Partially supported by this backend +- ❌ Not supported by this backend + +| Operation | BLAS | CPU | CUDA | Metal | +|-----------|------|------|------|------| +| ABS | ❌ | ✅ | 🟡 | ❌ | +| ACC | ❌ | ✅ | ✅ | ✅ | +| ADD | ❌ | ✅ | ✅ | 🟡 | +| ADD1 | ❌ | ✅ | ✅ | ❌ | +| ARANGE | ❌ | ✅ | ✅ | ✅ | +| ARGMAX | ❌ | ✅ | ✅ | ✅ | +| ARGSORT | ❌ | ✅ | ✅ | ✅ | +| CLAMP | ❌ | ✅ | ✅ | 🟡 | +| CONCAT | ❌ | ✅ | 🟡 | ✅ | +| CONT | ❌ | ✅ | 🟡 | ✅ | +| CONV_2D_DW | ❌ | ✅ | ✅ | ❌ | +| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | +| CONV_TRANSPOSE_2D | ❌ | ✅ | ✅ | ❌ | +| COS | ❌ | ✅ | ✅ | 🟡 | +| COUNT_EQUAL | ❌ | ✅ | ✅ | ❌ | +| CPY | ❌ | 🟡 | 🟡 | 🟡 | +| CROSS_ENTROPY_LOSS | ❌ | ✅ | ✅ | ❌ | +| CROSS_ENTROPY_LOSS_BACK | ❌ | ✅ | ✅ | ❌ | +| DIAG_MASK_INF | ❌ | ✅ | ✅ | 🟡 | +| DIV | ❌ | ✅ | ✅ | 🟡 | +| DUP | ❌ | ✅ | 🟡 | 🟡 | +| ELU | ❌ | ✅ | ❌ | 🟡 | +| EXP | ❌ | ✅ | 🟡 | ❌ | +| FLASH_ATTN_EXT | ❌ | ✅ | 🟡 | 🟡 | +| GATED_LINEAR_ATTN | ❌ | ✅ | ✅ | ❌ | +| GEGLU | ❌ | ✅ | ✅ | 🟡 | +| GEGLU_ERF | ❌ | ✅ | ✅ | 🟡 | +| GEGLU_QUICK | ❌ | ✅ | ✅ | 🟡 | +| GELU | ❌ | ✅ | 🟡 | 🟡 | +| GELU_ERF | ❌ | ✅ | 🟡 | 🟡 | +| GELU_QUICK | ❌ | ✅ | 🟡 | 🟡 | +| GET_ROWS | ❌ | ✅ | 🟡 | ✅ | +| GET_ROWS_BACK | ❌ | 🟡 | 🟡 | ❌ | +| GROUP_NORM | ❌ | ✅ | ✅ | ✅ | +| HARDSIGMOID | ❌ | ✅ | 🟡 | ❌ | +| HARDSWISH | ❌ | ✅ | 🟡 | ❌ | +| IM2COL | ❌ | ✅ | ✅ | 🟡 | +| L2_NORM | ❌ | ✅ | ✅ | ✅ | +| LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | +| LOG | ❌ | ✅ | ✅ | ❌ | +| MEAN | ❌ | ✅ | ✅ | ✅ | +| MUL | ❌ | ✅ | ✅ | 🟡 | +| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | +| MUL_MAT_ID | ❌ | ✅ | ✅ | ✅ | +| NEG | ❌ | ✅ | 🟡 | 🟡 | +| NORM | ❌ | ✅ | ✅ | 🟡 | +| OPT_STEP_ADAMW | ❌ | ✅ | ✅ | ❌ | +| OUT_PROD | 🟡 | 🟡 | 🟡 | ❌ | +| PAD | ❌ | ✅ | ✅ | ✅ | +| PAD_REFLECT_1D | ❌ | ✅ | ❌ | ✅ | +| POOL_2D | ❌ | ✅ | ✅ | ✅ | +| REGLU | ❌ | ✅ | ✅ | 🟡 | +| RELU | ❌ | ✅ | 🟡 | 🟡 | +| REPEAT | ❌ | ✅ | 🟡 | ✅ | +| REPEAT_BACK | ❌ | ✅ | ✅ | ❌ | +| RMS_NORM | ❌ | ✅ | ✅ | 🟡 | +| RMS_NORM_BACK | ❌ | ✅ | ✅ | ❌ | +| RMS_NORM_MUL | ❌ | ✅ | ✅ | ✅ | +| ROPE | ❌ | ✅ | ✅ | ✅ | +| ROPE_BACK | ❌ | ✅ | ✅ | ❌ | +| RWKV_WKV6 | ❌ | ✅ | ✅ | ✅ | +| RWKV_WKV7 | ❌ | ✅ | ✅ | ✅ | +| SCALE | ❌ | ✅ | ✅ | ✅ | +| SET | ❌ | ✅ | ❌ | ✅ | +| SET_ROWS | ❌ | 🟡 | ❌ | 🟡 | +| SGN | ❌ | ✅ | 🟡 | ❌ | +| SIGMOID | ❌ | ✅ | 🟡 | 🟡 | +| SILU | ❌ | ✅ | 🟡 | 🟡 | +| SILU_BACK | ❌ | ✅ | ✅ | ❌ | +| SIN | ❌ | ✅ | ✅ | 🟡 | +| SOFT_MAX | ❌ | ✅ | ✅ | ✅ | +| SOFT_MAX_BACK | ❌ | 🟡 | 🟡 | ❌ | +| SQR | ❌ | ✅ | ✅ | 🟡 | +| SQRT | ❌ | ✅ | ✅ | 🟡 | +| SSM_CONV | ❌ | ✅ | ✅ | ✅ | +| SSM_SCAN | ❌ | ✅ | ✅ | ✅ | +| STEP | ❌ | ✅ | 🟡 | ❌ | +| SUB | ❌ | ✅ | ✅ | 🟡 | +| SUM | ❌ | ✅ | ✅ | ❌ | +| SUM_ROWS | ❌ | ✅ | ✅ | ✅ | +| SWIGLU | ❌ | ✅ | ✅ | 🟡 | +| TANH | ❌ | ✅ | 🟡 | 🟡 | +| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | +| UPSCALE | ❌ | ✅ | ✅ | 🟡 | diff --git a/docs/ops/BLAS.csv b/docs/ops/BLAS.csv new file mode 100644 index 0000000000000..dde13f701d83e --- /dev/null +++ b/docs/ops/BLAS.csv @@ -0,0 +1,6534 @@ +"test_time","build_commit","backend_name","op_name","op_params","test_mode","supported","passed","error_message","time_us","flops","bandwidth_gb_s","memory_kb","n_runs","device_description","backend_reg_name" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NEG","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","STEP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TANH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,32],ne_kernel=[3,3,1,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,32],ne_kernel=[3,3,2,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,1024],ne_kernel=[3,3,1,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,1024],ne_kernel=[3,3,2,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2048],ne_kernel=[3,3,1,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2048],ne_kernel=[3,3,2,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[100,10,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[1024,10,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[1024,12,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[2000,10,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGMAX","type=f32,ne=[5438,3,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f32,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f16,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=i32,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=i16,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f32,ne=[10,10,10,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f32,ne=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f32,ne=[2,1,3,5]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f32,ne=[2,3,5,7]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f16,ne=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f16,ne=[2,1,3,5]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=f16,ne=[2,3,5,7]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=bf16,ne=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=bf16,ne=[2,1,3,5]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONT","type=bf16,ne=[2,3,5,7]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ADD1","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=1.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","L2_NORM","type=f32,ne=[64,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SQR","type=f16,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SQRT","type=f16,ne=[10,3,3,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","LOG","type=f16,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIN","type=f16,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","COS","type=f16,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SQR","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SQRT","type=f32,ne=[10,3,3,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","LOG","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SIN","type=f32,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","COS","type=f32,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUM","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","MEAN","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" +"2025-07-10T14:15:03Z","b8a6ff407","BLAS","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Accelerate","BLAS" diff --git a/docs/ops/CPU.csv b/docs/ops/CPU.csv new file mode 100644 index 0000000000000..ca3222d71ebab --- /dev/null +++ b/docs/ops/CPU.csv @@ -0,0 +1,6534 @@ +"test_time","build_commit","backend_name","op_name","op_params","test_mode","supported","passed","error_message","time_us","flops","bandwidth_gb_s","memory_kb","n_runs","device_description","backend_reg_name" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NEG","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","STEP","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TANH","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ELU","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RELU","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,32],ne_kernel=[3,3,1,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,32],ne_kernel=[3,3,2,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,1024],ne_kernel=[3,3,1,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,1024],ne_kernel=[3,3,2,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2048],ne_kernel=[3,3,1,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2048],ne_kernel=[3,3,2,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[2000,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGMAX","type=f32,ne=[5438,3,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f32,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f16,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=i32,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=i16,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f32,ne=[10,10,10,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f32,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f32,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f32,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f16,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f16,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=f16,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=bf16,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=bf16,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONT","type=bf16,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ADD1","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SQR","type=f16,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SQRT","type=f16,ne=[10,3,3,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","LOG","type=f16,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIN","type=f16,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","COS","type=f16,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SQR","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SQRT","type=f32,ne=[10,3,3,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","LOG","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SIN","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","COS","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUM","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","MEAN","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" +"2025-07-09T15:15:35Z","26a48ad6","CPU","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","AMD Ryzen 7 3800XT 8-Core Processor","CPU" diff --git a/docs/ops/CUDA.csv b/docs/ops/CUDA.csv new file mode 100644 index 0000000000000..e2d7d42ab5af7 --- /dev/null +++ b/docs/ops/CUDA.csv @@ -0,0 +1,6534 @@ +"test_time","build_commit","backend_name","op_name","op_params","test_mode","supported","passed","error_message","time_us","flops","bandwidth_gb_s","memory_kb","n_runs","device_description","backend_reg_name" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NEG","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","STEP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TANH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,32],ne_kernel=[3,3,1,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,32],ne_kernel=[3,3,2,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,1024],ne_kernel=[3,3,1,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,1024],ne_kernel=[3,3,2,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2048],ne_kernel=[3,3,1,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2048],ne_kernel=[3,3,2,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[2000,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGMAX","type=f32,ne=[5438,3,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f32,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f16,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=i32,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=i16,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f32,ne=[10,10,10,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f32,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f32,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f32,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f16,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f16,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=f16,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=bf16,ne=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=bf16,ne=[2,1,3,5]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONT","type=bf16,ne=[2,3,5,7]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ADD1","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SQR","type=f16,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SQRT","type=f16,ne=[10,3,3,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","LOG","type=f16,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIN","type=f16,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","COS","type=f16,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SQR","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SQRT","type=f32,ne=[10,3,3,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","LOG","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SIN","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","COS","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUM","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","MEAN","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" +"2025-07-09T15:15:24Z","26a48ad6","CUDA0","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","NVIDIA GeForce RTX 3090","CUDA" diff --git a/docs/ops/Metal.csv b/docs/ops/Metal.csv new file mode 100644 index 0000000000000..ac45d46b3c40a --- /dev/null +++ b/docs/ops/Metal.csv @@ -0,0 +1,6534 @@ +"test_time","build_commit","backend_name","op_name","op_params","test_mode","supported","passed","error_message","time_us","flops","bandwidth_gb_s","memory_kb","n_runs","device_description","backend_reg_name" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ABS","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SGN","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NEG","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","STEP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TANH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_QUICK","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSWISH","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=0,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,swapped=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[128,2,2,2],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f16,ne_a=[5,7,11,13],v=1,split","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=0,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SWIGLU","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_ERF","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,swapped=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[128,2,2,2],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GEGLU_QUICK","type=f32,ne_a=[5,7,11,13],v=1,split","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f32,n=1,m=8,r=2,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=f16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=bf16,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=bf16,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_1,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q8_0,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q2_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q3_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q4_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q5_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=q6_K,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xxs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_xs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq2_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_xxs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq1_m,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_nl,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq3_s,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=iq4_xs,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=i32,n=256,m=5,r=4,b=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS","type=i32,n=256,m=5,r=4,b=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=f32,n=1,m=8,r=2,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=f32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=f16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=bf16,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_1,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q8_0,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q2_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q3_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q4_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q5_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=q6_K,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq2_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq3_xxs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq1_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq1_m,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq4_nl,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq3_s,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=iq4_xs,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GET_ROWS_BACK","type=i32,n=256,m=5,r=4,b=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[1,8,1,3],nr23=[1,1],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f32,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=f16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[3,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[31,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[33,5,1,1],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[3,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[31,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=bf16,ne=[33,5,1,7],nr23=[2,3],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_1,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q8_0,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q2_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q3_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q4_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q5_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=q6_K,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq2_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_xxs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq1_m,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_nl,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq3_s,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[768,3,1,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET_ROWS","type=iq4_xs,ne=[768,3,7,1],nr23=[2,3],r=2,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=avg,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=1,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=1,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=1,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=1,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=3,s1=0,p0=3,p1=0,d0=3,d1=0,is_2D=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,3,1],ne_kernel=[3,3,3,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=1,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=1,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=0,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=0,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=1,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=1,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,20,2,2],ne_kernel=[3,3,2,2],s0=3,s1=3,p0=3,p1=3,d0=3,d1=3,is_2D=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,32],ne_kernel=[3,3,1,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,32],ne_kernel=[3,3,2,32],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,1024],ne_kernel=[3,3,1,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,1024],ne_kernel=[3,3,2,1024],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2048],ne_kernel=[3,3,1,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2048],ne_kernel=[3,3,2,2048],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,1,2560],ne_kernel=[3,3,1,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_2D_DW","ne_input=[17,34,9,1],ne_kernel=[3,3,1,9],stride=1,padding=0,dilation=1,cwhn=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_2D_DW","ne_input=[32,8,64,1],ne_kernel=[3,3,1,64],stride=2,padding=1,dilation=1,cwhn=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,1,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,1,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[3,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,1,1,1],ne_kernel=[1337,9,1,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[3,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[1,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[13,7,1,1],ne_kernel=[1337,9,7,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[197,32,1,1],ne_kernel=[16,32,32,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=3,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[2,3,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=2,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_2D","ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONV_TRANSPOSE_2D","ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","COUNT_EQUAL","type=f32,ne=[4,500,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","COUNT_EQUAL","type=f32,ne=[4,5000,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[100,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[1024,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[1024,12,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[2000,10,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGMAX","type=f32,ne=[5438,3,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,2,1],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,2],v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f32,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f16,ne=[10,10,20,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=i32,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=i16,ne=[10,10,20,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f32,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f16,ne=[10,10,5,1],permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f32,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=f16,ne=[10,10,5,1],permute=[1,0,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=i16,ne=[10,8,3,1],permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DUP","type=i16,ne=[10,8,3,1],permute=[1,2,0,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=f32,type_dst=f32,ne=[6,5,4,3],dim=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SET","type_src=i32,type_dst=i32,ne=[6,5,4,3],dim=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[1,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[2,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[3,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=q4_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=q4_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=q5_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=q5_1,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=q8_0,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=q2_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=q3_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=q4_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=q5_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=q6_K,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=iq2_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=iq2_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=iq2_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=iq3_xxs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=iq1_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=iq1_m,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[32,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[64,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=iq4_nl,ne=[96,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=iq3_s,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[512,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=iq4_xs,ne=[768,2,3,4],permute_src=[0,3,1,2],permute_dst=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=bf16,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=bf16,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_1,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_1,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q8_0,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q8_0,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q2_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q2_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q3_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q3_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q4_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q5_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q6_K,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=q6_K,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq2_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq3_xxs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq1_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq1_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq1_m,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq1_m,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq4_nl,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq3_s,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq3_s,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=iq4_xs,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=bf16,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_1,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q8_0,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q2_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q3_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q4_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q5_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=q6_K,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq2_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_xxs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq1_m,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_nl,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq3_s,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,4,4,4],permute_src=[0,0,0,0],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=iq4_xs,type_dst=f32,ne=[256,2,3,4],permute_src=[0,2,1,3],permute_dst=[0,0,0,0]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f16,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f16,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CPY","type_src=f32,type_dst=f32,ne=[256,2,3,4],permute_src=[1,0,2,3],permute_dst=[0,0,0,0]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f32,ne=[10,10,10,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f32,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f32,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f32,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f16,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f16,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=f16,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=bf16,ne=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=bf16,ne=[2,1,3,5]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONT","type=bf16,ne=[2,3,5,7]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,8,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,1,1],nr=[32,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,320,320],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[2,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,1,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,1,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[1,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[10,5,4,3],nr=[2,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1280,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1280,1,1,1],nr=[1,16,16,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1280,16,16,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1280,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,1280,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[16,16,1280,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,1920,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,2560,1],nr=[16,16,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,1280,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,1920,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[1,1,640,1],nr=[32,32,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[5120,1,1,1],nr=[1,256,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f16,ne=[640,1,1,1],nr=[1,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,8,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,1,1],nr=[32,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,320,320],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,1,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[1,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[10,5,4,3],nr=[2,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1280,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1280,1,1,1],nr=[1,16,16,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1280,16,16,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1280,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,1280,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[16,16,1280,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,1920,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,2560,1],nr=[16,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,1280,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,1920,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[1,1,640,1],nr=[32,32,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[5120,1,1,1],nr=[1,256,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUB","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIV","type=f32,ne=[640,1,1,1],nr=[1,1,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ADD1","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SCALE","type=f32,ne=[10,10,10,10],scale=2.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SILU_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000001","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.000100","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=0,eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM","type=f32,ne=[64,5,4,3],v=1,eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_BACK","type=f32,ne=[64,5,4,3],eps=0.100000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.000100","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RMS_NORM_MUL","type=f32,ne=[64,5,4,3],eps=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","L2_NORM","type=f32,ne=[64,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SSM_CONV","type=f32,ne_a=[4,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SSM_CONV","type=f32,ne_a=[8,1536,1,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SSM_CONV","type=f32,ne_a=[4,1536,4,1],ne_b=[4,1536,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SSM_SCAN","type=f32,d_state=16,head_dim=1,n_head=1024,n_group=1,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SSM_SCAN","type=f32,d_state=128,head_dim=64,n_head=16,n_group=2,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV6","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","RWKV_WKV7","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=1,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=32,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GATED_LINEAR_ATTN","type=f32,head_count=32,head_size=64,n_seq_tokens=128,n_seqs=4","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=2,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=3,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=4,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=5,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=6,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=7,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=8,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=9,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=4,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_0,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_K,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f32,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[1,1],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,1],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[1,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[3,2],nr=[2,2],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,1,3,2],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=256,bs=[2,3],nr=[1,1],per=[0,3,2,1],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=1,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=8,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xxs,type_b=f16,m=16,n=16,k=1024,bs=[3,2],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q4_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_1,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q8_0,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q2_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q3_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q5_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=q6_K,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq2_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_xxs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq1_m,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=32,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_nl,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq3_s,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=iq4_xs,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=1,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=128,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=64,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=83,n=2,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=64,n=45,k=128,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=45,k=64,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=193,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=67,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[1,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[1,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[2,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[2,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[4,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[4,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[1,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[1,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1056,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=128,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=128,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1056,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=bf16,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=1057,n=1,k=129,bs=[8,1],nr=[4,1],per=[0,2,1,3],v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT","type_a=f32,type_b=f32,m=129,n=1,k=1057,bs=[8,1],nr=[4,1],per=[0,1,2,3],v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=0,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=32,n=1024,k=16","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=1,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=2,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=0,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=8,n_used=4,b=1,m=512,n=129,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=1,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=512,n=32,k=256","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f32,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=f16,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q8_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_0,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_1,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=q4_K,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f32,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=1,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=1,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[1,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,1],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[1,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,1],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OUT_PROD","type_a=iq2_xxs,type_b=f16,m=256,n=16,k=16,bs=[3,3],nr=[2,2],trans_b=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SQR","type=f16,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SQRT","type=f16,ne=[10,3,3,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","LOG","type=f16,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIN","type=f16,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","COS","type=f16,ne=[10,2,2,2]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SQR","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SQRT","type=f32,ne=[10,3,3,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","LOG","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SIN","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","COS","type=f32,ne=[10,2,2,2]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CLAMP","type=f32,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIAG_MASK_INF","type=f32,ne=[10,10,1,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIAG_MASK_INF","type=f32,ne=[10,10,3,1],n_past=5","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","DIAG_MASK_INF","type=f32,ne=[10,10,3,2],n_past=5","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=1.000000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f32,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f32,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,16,1,3],mask=1,m_prec=f16,nr23=[3,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,15,1,1],mask=1,m_prec=f16,nr23=[2,3],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[15,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,16,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,15,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1024,1024,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[1023,1023,1,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[16,2,32,1],mask=0,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=0.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f32,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX","type=f32,ne=[32,2,32,1],mask=1,m_prec=f16,nr23=[1,1],scale=0.100000,max_bias=8.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=0.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=1.000000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[16,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[15,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,16,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,15,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1024,1024,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SOFT_MAX_BACK","type=f32,ne=[1023,1023,1,1],scale=0.100000,max_bias=8.000000","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,40,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,52,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,64,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,1,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,71,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,8,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=20,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,2,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,32,4,1],n_dims=32,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=128,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,12,2,1],n_dims=20,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,28,2,1],n_dims=32,mode=8,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[80,16,2,1],n_dims=80,mode=24,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.000000,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.000000,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.000000,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f32,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=0,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=0,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ROPE_BACK","type=f16,ne_a=[64,128,2,1],n_dims=64,mode=2,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[8,1,1,1],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[16,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[60,10,10,10],order=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[8,1,1,1],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[16,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARGSORT","type=f32,ne=[60,10,10,10],order=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=nearest,transpose=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=0","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=0","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[512,512,3,2],scale_factor=2,mode=bilinear,transpose=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[5,7,11,13],ne_tgt=[2,5,7,11],mode=1","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","UPSCALE","type=f32,ne=[2,5,7,11],ne_tgt=[5,7,11,13],mode=257","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUM","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","SUM_ROWS","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","MEAN","type=f32,ne=[10,5,4,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GROUP_NORM","type=f32,ne=[64,64,320,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","GROUP_NORM","type=f32,ne=[9,9,1280,1],num_groups=32,eps=0.000001","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ACC","type=f32,ne_a=[256,17,1,1],ne_b=[256,16,1,1]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","PAD","type=f32,ne_a=[512,512,1,1],pad_0=1,pad_1=1","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","PAD_REFLECT_1D","type=f32,ne_a=[512,34,2,1],pad_0=10,pad_1=9","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","ARANGE","type=f32,start=0.000000,stop=10.000000,step=1.000000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","TIMESTEP_EMBEDDING","type=f32,ne_a=[2,1,1,1],dim=320,max_period=10000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","LEAKY_RELU","type=f32,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,3],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[4,3],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=80,hsv=80,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[16,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=10.000000,prec=def,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=128,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=192,hsv=192,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=256,hsv=256,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","1","1","yes","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,2,1,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=1,max_bias=8.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[1,1],kv=1024,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=1,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=3,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=32,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=f16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=bf16,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q8_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","FLASH_ATTN_EXT","hsk=576,hsv=512,nh=4,nr23=[4,1],kv=512,nb=35,mask=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_KV=q4_0,permute=[0,1,2,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CROSS_ENTROPY_LOSS","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CROSS_ENTROPY_LOSS","type=f32,ne=[30000,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","CROSS_ENTROPY_LOSS_BACK","type=f32,ne=[30000,1,1,1]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" +"2025-07-10T14:14:27Z","b8a6ff407","Metal","OPT_STEP_ADAMW","type=f32,ne=[10,5,4,3]","support","0","0","no","0.000000","0.000000","0.000000","0","0","Apple M2 Ultra","Metal" diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 49e4d2cf8c198..11ff38762b848 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -33,6 +33,7 @@ else() add_subdirectory(speculative-simple) add_subdirectory(gen-docs) add_subdirectory(training) + add_subdirectory(diffusion) if (NOT GGML_BACKEND_DL) add_subdirectory(convert-llama2c-to-ggml) # these examples use the backends directly and cannot be built with dynamic loading diff --git a/examples/Miku.sh b/examples/Miku.sh index 0f6c8c8787107..9492bfedc03e7 100755 --- a/examples/Miku.sh +++ b/examples/Miku.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e AI_NAME="${AI_NAME:-Miku}" diff --git a/examples/chat-13B.sh b/examples/chat-13B.sh index 1828903c31670..f025a47cbfea3 100755 --- a/examples/chat-13B.sh +++ b/examples/chat-13B.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e diff --git a/examples/chat-persistent.sh b/examples/chat-persistent.sh index 9d761ebb843af..d6b6cb9518258 100755 --- a/examples/chat-persistent.sh +++ b/examples/chat-persistent.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail diff --git a/examples/chat-vicuna.sh b/examples/chat-vicuna.sh index ffdd200849503..c930962fd3203 100755 --- a/examples/chat-vicuna.sh +++ b/examples/chat-vicuna.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e diff --git a/examples/chat.sh b/examples/chat.sh index 9f85d1e265d00..5fec46d17ba40 100755 --- a/examples/chat.sh +++ b/examples/chat.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Temporary script - will be removed in the future diff --git a/examples/diffusion/CMakeLists.txt b/examples/diffusion/CMakeLists.txt new file mode 100644 index 0000000000000..396549c8029d9 --- /dev/null +++ b/examples/diffusion/CMakeLists.txt @@ -0,0 +1,5 @@ +set(TARGET llama-diffusion-cli) +add_executable(${TARGET} diffusion-cli.cpp) +install(TARGETS ${TARGET} RUNTIME) +target_link_libraries(${TARGET} PRIVATE llama common ${CMAKE_THREAD_LIBS_INIT}) +target_compile_features(${TARGET} PRIVATE cxx_std_17) diff --git a/examples/diffusion/diffusion-cli.cpp b/examples/diffusion/diffusion-cli.cpp new file mode 100644 index 0000000000000..3e11ce1160b05 --- /dev/null +++ b/examples/diffusion/diffusion-cli.cpp @@ -0,0 +1,507 @@ +#include "arg.h" +#include "chat.h" +#include "common.h" +#include "llama.h" +#include "log.h" + +#include +#include +#include +#include +#include +#include +#include + +typedef bool (*diffusion_step_callback_t)(int32_t step, + int32_t total_steps, + const llama_token * tokens, + int32_t n_tokens, + void * user_data); + +enum diffusion_alg { + DIFFUSION_ALG_ORIGIN = 0, + DIFFUSION_ALG_MASKGIT_PLUS = 1, + DIFFUSION_ALG_TOPK_MARGIN = 2, + DIFFUSION_ALG_ENTROPY = 3, +}; + +struct diffusion_params { + int32_t steps; + float eps; + float temperature; + float top_p; + int32_t top_k; + llama_token mask_token_id; + enum diffusion_alg algorithm; + float alg_temp; + diffusion_step_callback_t step_callback; + void * step_callback_user_data; + int32_t seed; +}; + + +static diffusion_params diffusion_default_params() { + diffusion_params params = {}; + params.steps = 64; + params.eps = 1e-3f; + params.temperature = 0.2f; + params.top_p = 0.95f; + params.top_k = 0; + params.mask_token_id = LLAMA_TOKEN_NULL; + params.algorithm = DIFFUSION_ALG_ORIGIN; + params.alg_temp = 0.0f; + params.step_callback = nullptr; + params.step_callback_user_data = nullptr; + params.seed = 0; + return params; +} + +static void diffusion_generate(llama_context * ctx, + const llama_token * input_tokens, + llama_token * output_tokens, + int32_t n_input, + int32_t max_length, + struct diffusion_params params, + int32_t & n_generated) { + + n_generated = 0; + if (!ctx || !input_tokens || !output_tokens || n_input <= 0 || max_length <= n_input) { + return; + } + + const llama_model * model = llama_get_model(ctx); + + // Initialize with input and pad with mask tokens + std::copy(input_tokens, input_tokens + n_input, output_tokens); + std::fill(output_tokens + n_input, output_tokens + max_length, params.mask_token_id); + + std::mt19937 rng(params.seed); + + std::vector timesteps(params.steps + 1); + for (int32_t i = 0; i <= params.steps; i++) { + timesteps[i] = 1.0f - (float) i / params.steps * (1.0f - params.eps); + } + + llama_set_causal_attn(ctx, false); + + int32_t n_vocab = llama_vocab_n_tokens(llama_model_get_vocab(model)); + + std::vector candidates(n_vocab); + + std::vector conf_candidates; + conf_candidates.reserve(max_length); + + std::vector mask_positions; + mask_positions.reserve(max_length); + + struct llama_sampler * sampler = llama_sampler_chain_init(llama_sampler_chain_default_params()); + if (params.top_k > 0) { + llama_sampler_chain_add(sampler, llama_sampler_init_top_k(params.top_k)); + } + if (params.top_p < 1.0f) { + llama_sampler_chain_add(sampler, llama_sampler_init_top_p(params.top_p, 1)); + } + if (params.temperature > 0.0f) { + llama_sampler_chain_add(sampler, llama_sampler_init_temp(params.temperature)); + } + llama_sampler_chain_add(sampler, llama_sampler_init_dist(params.seed)); + + struct llama_sampler * dist_sampler = llama_sampler_init_dist(params.seed); + + llama_batch batch = llama_batch_init(max_length, 0, 1); + batch.n_tokens = max_length; + + int64_t total_sampling_time = 0; + int64_t total_time = 0; + + int64_t time_start = ggml_time_us(); + for (int32_t step = 0; step < params.steps; step++) { + if (params.step_callback) { + if (!params.step_callback(step, params.steps, output_tokens, max_length, params.step_callback_user_data)) { + break; + } + } + + for (int32_t i = 0; i < max_length; i++) { + batch.token[i] = output_tokens[i]; + batch.pos[i] = i; + batch.n_seq_id[i] = 1; + batch.seq_id[i][0] = 0; + batch.logits[i] = 1; + } + + int ret = llama_decode(ctx, batch); + if (ret != 0) { + LOG_ERR("%s: failed to decode at step %d, ret = %d\n", __func__, step, ret); + break; + } + + float * raw_logits = llama_get_logits(ctx); + if (!raw_logits) { + LOG_ERR("%s: failed to get logits at step %d\n", __func__, step); + break; + } + + auto get_logits_for_pos = [&](int32_t pos) -> const float * { + return pos == 0 ? raw_logits : raw_logits + (pos - 1) * n_vocab; + }; + + int64_t time_start_sampling = ggml_time_us(); + + mask_positions.clear(); + for (int32_t i = 0; i < max_length; i++) { + if (output_tokens[i] == params.mask_token_id) { + mask_positions.push_back(i); + } + } + + if (mask_positions.empty()) { + break; + } + + float t = timesteps[step]; + float s = timesteps[step + 1]; + + if (params.algorithm == DIFFUSION_ALG_ORIGIN) { + float p_transfer = (step < params.steps - 1) ? (1.0f - s / t) : 1.0f; + + for (int32_t pos : mask_positions) { + if (std::uniform_real_distribution(0.0f, 1.0f)(rng) < p_transfer) { + const float * pos_logits = get_logits_for_pos(pos); + for (int32_t token_id = 0; token_id < n_vocab; token_id++) { + candidates[token_id].id = token_id; + candidates[token_id].logit = pos_logits[token_id]; + candidates[token_id].p = 0.0f; + } + + llama_token_data_array cur_p = { + /* .data = */ candidates.data(), + /* .size = */ (size_t) n_vocab, // Reset size to full vocab + /* .selected = */ -1, + /* .sorted = */ false, + }; + + llama_sampler_apply(sampler, &cur_p); + output_tokens[pos] = cur_p.data[cur_p.selected].id; + } + } + } else { + std::vector> confidences; + std::vector sampled_tokens(mask_positions.size()); + + for (size_t i = 0; i < mask_positions.size(); i++) { + int32_t pos = mask_positions[i]; + const float * pos_logits = get_logits_for_pos(pos); + + for (int32_t token_id = 0; token_id < n_vocab; token_id++) { + candidates[token_id].logit = pos_logits[token_id]; + candidates[token_id].p = 0.0f; + candidates[token_id].id = token_id; + } + + llama_token_data_array cur_p = { + /* .data = */ candidates.data(), + /* .size = */ candidates.size(), + /* .selected = */ -1, + /* .sorted = */ false, + }; + + llama_sampler_apply(sampler, &cur_p); + + llama_token sampled_token = cur_p.data[cur_p.selected].id; + + float confidence = 0.0f; + if (params.algorithm == DIFFUSION_ALG_ENTROPY) { + const float epsilon = 1e-10f; + for (size_t j = 0; j < cur_p.size; j++) { + float prob = cur_p.data[j].p; + confidence += prob * logf(prob + epsilon); + } + } else if (params.algorithm == DIFFUSION_ALG_TOPK_MARGIN) { + confidence = cur_p.data[0].p - cur_p.data[1].p; + } else { + confidence = cur_p.data[cur_p.selected].p; + } + + sampled_tokens[i] = sampled_token; + confidences.emplace_back(confidence, i); + } + + int32_t num_transfer = + (step < params.steps - 1) ? (int32_t) (mask_positions.size() * (1.0f - s / t)) : mask_positions.size(); + + if (num_transfer > 0) { + if (params.alg_temp == 0.0f) { + std::partial_sort(confidences.begin(), confidences.begin() + num_transfer, confidences.end(), + [](const std::pair & a, const std::pair & b) { + if (a.first != b.first) { + return a.first > b.first; + } + return a.second < b.second; + }); + } else { + conf_candidates.clear(); + + for (int32_t pos = 0; pos < max_length; pos++) { + float conf_logit = -std::numeric_limits::infinity(); + + auto it = std::find(mask_positions.begin(), mask_positions.end(), pos); + if (it != mask_positions.end()) { + size_t mask_idx = std::distance(mask_positions.begin(), it); + conf_logit = confidences[mask_idx].first / params.alg_temp; // Apply temperature scaling + } + + conf_candidates.emplace_back(llama_token_data{ pos, conf_logit, 0.0f }); + } + + llama_token_data_array conf_array = { + /* .data = */ conf_candidates.data(), + /* .size = */ conf_candidates.size(), + /* .selected = */ -1, + /* .sorted = */ false, + }; + + for (int32_t i = 0; i < num_transfer; i++) { + // Apply distribution sampler to get selected index + llama_sampler_apply(dist_sampler, &conf_array); + int selected_idx = conf_array.selected; + confidences[i].second = conf_candidates[selected_idx].id; + + conf_candidates[selected_idx].p = 0.0f; + conf_array.selected = -1; + } + } + + if (params.alg_temp == 0.0f) { + // Deterministic - use confidence order + for (int32_t i = 0; i < num_transfer; i++) { + int32_t mask_idx = confidences[i].second; + int32_t pos = mask_positions[mask_idx]; + llama_token token = sampled_tokens[mask_idx]; + output_tokens[pos] = token; + } + } else { + for (int32_t i = 0; i < num_transfer; i++) { + int32_t pos = confidences[i].second; + auto it = std::find(mask_positions.begin(), mask_positions.end(), pos); + if (it != mask_positions.end()) { + int32_t mask_idx = std::distance(mask_positions.begin(), it); + output_tokens[pos] = sampled_tokens[mask_idx]; + } + } + } + } + } + int64_t time_end_sampling = ggml_time_us(); + total_sampling_time += time_end_sampling - time_start_sampling; + } + int64_t time_end = ggml_time_us(); + total_time += time_end - time_start; + + LOG_INF("\ntotal time: %0.2fms, time per step: %0.2fms, sampling time per step: %0.2fms\n", + total_time / 1000.0, total_time / 1000.0 / params.steps, total_sampling_time / 1000.0 / params.steps); + + + llama_batch_free(batch); + llama_sampler_free(sampler); + llama_sampler_free(dist_sampler); + + n_generated = max_length; +} + + + + +static std::string format_input_text(const std::string & prompt, bool use_chat_template, llama_model * model) { + if (!use_chat_template) { + return prompt; + } + + auto chat_templates = common_chat_templates_init(model, ""); + + common_chat_templates_inputs inputs; + common_chat_msg user_msg; + user_msg.role = "user"; + user_msg.content = prompt; + inputs.add_generation_prompt = true; + inputs.messages.push_back(user_msg); + + auto result = common_chat_templates_apply(chat_templates.get(), inputs); + + return result.prompt; +} + +struct callback_data { + const common_params_diffusion * diff_params; + const llama_vocab * vocab; + int32_t n_input; +}; + +static bool diffusion_step_callback(int32_t step, + int32_t total_steps, + const llama_token * tokens, + int32_t n_tokens, + void * user_data) { + (void)user_data; + + callback_data * data = static_cast(user_data); + + auto print_progress_bar = [](int32_t step, int32_t total_steps) { + int progress_percent = (step * 100) / total_steps; + int progress_bars = (step * 50) / total_steps; + LOG_INF("\rdiffusion step: %d/%d [%s%s] %d%%", + step, + total_steps, + std::string(progress_bars, '=').c_str(), + std::string(50 - progress_bars, ' ').c_str(), + progress_percent); + }; + + if (data->diff_params->visual_mode) { + // Visual mode: clear + LOG_INF("\033[2J\033[H"); // Clear screen and move cursor to top-left + + print_progress_bar(step, total_steps); + + LOG_INF("\n"); + + std::string current_text = " "; + + for (int32_t i = data->n_input; i < n_tokens; i++) { + std::string token_str; + if (tokens[i] != llama_vocab_mask(data->vocab)) { + char piece[256]; + int n_chars = llama_token_to_piece(data->vocab, tokens[i], piece, sizeof(piece), 0, false); + if (n_chars > 0) { + piece[n_chars] = '\0'; + token_str = piece; + } + } else { + token_str = " "; + } + + current_text += token_str; + } + + LOG_INF("%s\n", current_text.c_str()); + } else { + print_progress_bar(step, total_steps); + } + + return true; +} + +int main(int argc, char ** argv) { + ggml_time_init(); + + common_params params; + + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_DIFFUSION)) { + return 1; + } + + const char * alg_names[] = { "ORIGIN", "MASKGIT_PLUS", "TOPK_MARGIN", "ENTROPY" }; + const char * alg_name = (params.diffusion.algorithm >= 0 && params.diffusion.algorithm <= 3) ? + alg_names[params.diffusion.algorithm] : + "UNKNOWN"; + + common_init(); + llama_backend_init(); + + llama_model_params model_params = llama_model_default_params(); + model_params.n_gpu_layers = params.n_gpu_layers; + model_params.devices = params.devices.data(); + model_params.use_mmap = params.use_mmap; + model_params.use_mlock = params.use_mlock; + model_params.check_tensors = params.check_tensors; + + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), model_params); + if (!model) { + LOG_ERR("error: failed to load model '%s'\n", params.model.path.c_str()); + return 1; + } + + llama_context_params ctx_params = llama_context_default_params(); + ctx_params.n_ctx = params.n_ctx; + ctx_params.n_batch = params.n_batch; + ctx_params.n_ubatch = params.n_ubatch; + ctx_params.flash_attn = params.flash_attn; + ctx_params.no_perf = params.no_perf; + ctx_params.type_k = params.cache_type_k; + ctx_params.type_v = params.cache_type_v; + + llama_context * ctx = llama_init_from_model(model, ctx_params); + if (!ctx) { + LOG_ERR("error: failed to create context\n"); + llama_model_free(model); + return 1; + } + + llama_set_n_threads(ctx, params.cpuparams.n_threads, params.cpuparams_batch.n_threads); + + const llama_vocab * vocab = llama_model_get_vocab(model); + std::string formatted_prompt = format_input_text(params.prompt, params.enable_chat_template, model); + + std::vector input_tokens = common_tokenize(vocab, formatted_prompt, + /*add special tokens*/ true, + /*parse special*/ true); + int n_input = input_tokens.size(); + + if (n_input >= params.n_ctx) { + LOG_ERR("error: input too long (%d tokens), max context is %d\n", n_input, params.n_ctx); + llama_free(ctx); + llama_model_free(model); + return 1; + } + + struct diffusion_params ldiff_params = diffusion_default_params(); + ldiff_params.steps = params.diffusion.steps; + ldiff_params.eps = params.diffusion.eps; + ldiff_params.temperature = params.sampling.temp; + ldiff_params.top_p = params.sampling.top_p; + ldiff_params.top_k = params.sampling.top_k; + ldiff_params.algorithm = static_cast(params.diffusion.algorithm); + ldiff_params.alg_temp = params.diffusion.alg_temp; + ldiff_params.seed = params.sampling.seed; + + llama_token mask_token_id = llama_vocab_mask(vocab); + GGML_ASSERT(mask_token_id != LLAMA_TOKEN_NULL); + + LOG_INF("diffusion_params: - %-25s llama_token = %d\n", "mask_token_id", mask_token_id); + LOG_INF("diffusion_params: - %-25s u32 = %d\n", "steps", params.diffusion.steps); + LOG_INF("diffusion_params: - %-25s f32 = %.6f\n", "eps", params.diffusion.eps); + LOG_INF("diffusion_params: - %-25s u32 = %d (%s)\n", "algorithm", params.diffusion.algorithm, + alg_name); + LOG_INF("diffusion_params: - %-25s f32 = %.3f\n", "alg_temp", params.diffusion.alg_temp); + + ldiff_params.mask_token_id = mask_token_id; + + callback_data cb_data = { ¶ms.diffusion, vocab, n_input }; + + ldiff_params.step_callback = diffusion_step_callback; + ldiff_params.step_callback_user_data = &cb_data; + + int32_t n_generated = 0; + + std::vector output_tokens(params.n_ubatch); + diffusion_generate(ctx, input_tokens.data(), output_tokens.data(), n_input, params.n_ubatch, + ldiff_params, n_generated); + + if (n_generated > 0) { + if (params.diffusion.visual_mode) { + //clear screen and move cursor to top-left + LOG_INF("\033[2J\033[H"); + } + output_tokens.erase(output_tokens.begin(), output_tokens.begin() + n_input); + std::string output_data = common_detokenize(vocab, output_tokens, false); + LOG_INF("\n%s\n", output_data.c_str()); + } else { + LOG_INF("Error: diffusion generation failed\n"); + } + + llama_free(ctx); + llama_model_free(model); + llama_backend_free(); + + return 0; +} diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 681929d27d617..40ff6483807ee 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -107,7 +107,7 @@ int main(int argc, char ** argv) { const llama_vocab * vocab = llama_model_get_vocab(model); const int n_ctx_train = llama_model_n_ctx_train(model); - const int n_ctx = llama_n_ctx(ctx); + const int n_ctx = llama_n_ctx(ctx); const enum llama_pooling_type pooling_type = llama_pooling_type(ctx); @@ -133,10 +133,36 @@ int main(int argc, char ** argv) { // max batch size const uint64_t n_batch = params.n_batch; + // get added sep and eos token, if any + const std::string added_sep_token = llama_vocab_get_add_sep(vocab) ? llama_vocab_get_text(vocab, llama_vocab_sep(vocab)) : ""; + const std::string added_eos_token = llama_vocab_get_add_eos(vocab) ? llama_vocab_get_text(vocab, llama_vocab_eos(vocab)) : ""; + // tokenize the prompts and trim std::vector> inputs; for (const auto & prompt : prompts) { - auto inp = common_tokenize(ctx, prompt, true, true); + std::vector inp; + + // split classification pairs and insert expected separator tokens + if (pooling_type == LLAMA_POOLING_TYPE_RANK && prompt.find(params.cls_sep) != std::string::npos) { + std::vector pairs = split_lines(prompt, params.cls_sep); + std::string final_prompt; + + for (size_t i = 0; i < pairs.size(); i++) { + final_prompt += pairs[i]; + if (i != pairs.size() - 1) { + if (!added_eos_token.empty()) { + final_prompt += added_eos_token; + } + if (!added_sep_token.empty()) { + final_prompt += added_sep_token; + } + } + } + + inp = common_tokenize(ctx, final_prompt, true, true); + } else { + inp = common_tokenize(ctx, prompt, true, true); + } if (inp.size() > n_batch) { LOG_ERR("%s: number of tokens in input line (%lld) exceeds batch size (%lld), increase batch size and re-run\n", __func__, (long long int) inp.size(), (long long int) n_batch); @@ -145,11 +171,11 @@ int main(int argc, char ** argv) { inputs.push_back(inp); } - // check if the last token is SEP + // check if the last token is SEP/EOS // it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true' for (auto & inp : inputs) { - if (inp.empty() || inp.back() != llama_vocab_sep(vocab)) { - LOG_WRN("%s: last token in the prompt is not SEP\n", __func__); + if (inp.empty() || (inp.back() != llama_vocab_sep(vocab) && inp.back() != llama_vocab_eos(vocab))) { + LOG_WRN("%s: last token in the prompt is not SEP or EOS\n", __func__); LOG_WRN("%s: 'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__); } } diff --git a/examples/eval-callback/eval-callback.cpp b/examples/eval-callback/eval-callback.cpp index fb188f5a9e132..4afd80eb454ad 100644 --- a/examples/eval-callback/eval-callback.cpp +++ b/examples/eval-callback/eval-callback.cpp @@ -55,6 +55,8 @@ static void ggml_print_tensor(uint8_t * data, ggml_type type, const int64_t * ne v = ggml_fp16_to_fp32(*(ggml_fp16_t *) &data[i]); } else if (type == GGML_TYPE_F32) { v = *(float *) &data[i]; + } else if (type == GGML_TYPE_I64) { + v = (float) *(int64_t *) &data[i]; } else if (type == GGML_TYPE_I32) { v = (float) *(int32_t *) &data[i]; } else if (type == GGML_TYPE_I16) { @@ -134,6 +136,11 @@ static bool run(llama_context * ctx, const common_params & params) { std::vector tokens = common_tokenize(ctx, params.prompt, add_bos); + if (tokens.empty()) { + LOG_ERR("%s : there are not input tokens to process - (try to provide a prompt with '-p')\n", __func__); + return false; + } + if (llama_decode(ctx, llama_batch_get_one(tokens.data(), tokens.size()))) { LOG_ERR("%s : failed to eval\n", __func__); return false; diff --git a/examples/gritlm/gritlm.cpp b/examples/gritlm/gritlm.cpp index 041da61c743c1..bdab052c3390f 100644 --- a/examples/gritlm/gritlm.cpp +++ b/examples/gritlm/gritlm.cpp @@ -41,12 +41,11 @@ static std::vector> encode(llama_context * ctx, const std::ve // add input to batch (this increments n_tokens) for (int32_t j = 0; j < n_toks; j++) { - common_batch_add(batch, inputs[j], j, { 0 }, j >= n_inst); + common_batch_add(batch, inputs[j], j, { 0 }, true); } // clear previous kv_cache values (irrelevant for embeddings) llama_memory_clear(llama_get_memory(ctx), true); - llama_set_embeddings(ctx, true); llama_set_causal_attn(ctx, false); // run model @@ -103,7 +102,6 @@ static std::string generate(llama_context * ctx, llama_sampler * smpl, const std llama_token eos_token = llama_vocab_eos(vocab); llama_memory_clear(llama_get_memory(ctx), true); - llama_set_embeddings(ctx, false); llama_set_causal_attn(ctx, true); llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1); @@ -166,6 +164,8 @@ int main(int argc, char * argv[]) { llama_model_params mparams = common_model_params_to_llama(params); llama_context_params cparams = common_context_params_to_llama(params); + cparams.embeddings = true; + llama_backend_init(); llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams); @@ -213,6 +213,8 @@ int main(int argc, char * argv[]) { std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[1].c_str(), documents[1].c_str(), cosine_sim_q1_d1); } + llama_set_embeddings(ctx, false); + // ### Generation ### // GritLM models are not finetuned with system prompts, as you can just include system-like instructions together with your user instruction { diff --git a/examples/jeopardy/jeopardy.sh b/examples/jeopardy/jeopardy.sh index 07bcb3b8d78ac..800df2c6aee7d 100755 --- a/examples/jeopardy/jeopardy.sh +++ b/examples/jeopardy/jeopardy.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e MODEL=./models/ggml-vicuna-13b-1.1-q4_0.bin diff --git a/examples/parallel/parallel.cpp b/examples/parallel/parallel.cpp index d53e089a4cbc2..46fb451baa712 100644 --- a/examples/parallel/parallel.cpp +++ b/examples/parallel/parallel.cpp @@ -224,6 +224,7 @@ int main(int argc, char ** argv) { auto & client = clients[i]; client.id = i; client.smpl = common_sampler_init(model, params.sampling); + //params.sampling.seed++; } std::vector tokens_system; @@ -345,7 +346,7 @@ int main(int argc, char ** argv) { client.n_decoded = 0; client.i_batch = batch.n_tokens - 1; - LOG_INF("\033[31mClient %3d, seq %4d, junk = %4d, started decoding ...\033[0m\n", client.id, client.seq_id, n_junk_cur); + LOG_INF("\033[31mClient %3d, seq %4d, junk = %4d, prompt = %d, started decoding ...\033[0m\n", client.id, client.seq_id, n_junk_cur, client.n_prompt); g_seq_id += 1; diff --git a/examples/reason-act.sh b/examples/reason-act.sh index 06d592799cf12..3c801920d0195 100755 --- a/examples/reason-act.sh +++ b/examples/reason-act.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cd `dirname $0` cd .. diff --git a/examples/server-llama2-13B.sh b/examples/server-llama2-13B.sh index 4ce79b7fac477..fd5a575886f05 100755 --- a/examples/server-llama2-13B.sh +++ b/examples/server-llama2-13B.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e diff --git a/examples/simple-chat/simple-chat.cpp b/examples/simple-chat/simple-chat.cpp index 2aee0a919e60d..57195df331628 100644 --- a/examples/simple-chat/simple-chat.cpp +++ b/examples/simple-chat/simple-chat.cpp @@ -98,7 +98,7 @@ int main(int argc, char ** argv) { auto generate = [&](const std::string & prompt) { std::string response; - const bool is_first = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) == 0; + const bool is_first = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) == -1; // tokenize the prompt const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true); @@ -113,15 +113,16 @@ int main(int argc, char ** argv) { while (true) { // check if we have enough space in the context to evaluate this batch int n_ctx = llama_n_ctx(ctx); - int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0); + int n_ctx_used = llama_memory_seq_pos_max(llama_get_memory(ctx), 0) + 1; if (n_ctx_used + batch.n_tokens > n_ctx) { printf("\033[0m\n"); fprintf(stderr, "context size exceeded\n"); exit(0); } - if (llama_decode(ctx, batch)) { - GGML_ABORT("failed to decode\n"); + int ret = llama_decode(ctx, batch); + if (ret != 0) { + GGML_ABORT("failed to decode, ret = %d\n", ret); } // sample the next token diff --git a/examples/sycl/build.sh b/examples/sycl/build.sh index e72b2e2612f0d..1993520ebdaed 100755 --- a/examples/sycl/build.sh +++ b/examples/sycl/build.sh @@ -1,4 +1,4 @@ - +#!/usr/bin/env bash # MIT license # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: MIT diff --git a/examples/sycl/run-llama2.sh b/examples/sycl/run-llama2.sh index 40ce8f5b2b7b5..37195008de70f 100755 --- a/examples/sycl/run-llama2.sh +++ b/examples/sycl/run-llama2.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # MIT license # Copyright (C) 2024 Intel Corporation diff --git a/examples/sycl/run-llama3.sh b/examples/sycl/run-llama3.sh index 933d1b98bc075..8e21b017f4ca5 100755 --- a/examples/sycl/run-llama3.sh +++ b/examples/sycl/run-llama3.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # MIT license # Copyright (C) 2025 Intel Corporation diff --git a/examples/ts-type-to-grammar.sh b/examples/ts-type-to-grammar.sh index 9abba2a3daa7d..966050407888e 100755 --- a/examples/ts-type-to-grammar.sh +++ b/examples/ts-type-to-grammar.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}" # python examples/json_schema_to_grammar.py https://json.schemastore.org/tsconfig.json diff --git a/ggml/CMakeLists.txt b/ggml/CMakeLists.txt index 727139cf385b7..de6d789c98a03 100644 --- a/ggml/CMakeLists.txt +++ b/ggml/CMakeLists.txt @@ -131,6 +131,7 @@ option(GGML_RVV "ggml: enable rvv" ON) option(GGML_RV_ZFH "ggml: enable riscv zfh" OFF) option(GGML_XTHEADVECTOR "ggml: enable xtheadvector" OFF) option(GGML_VXE "ggml: enable vxe" ON) +option(GGML_NNPA "ggml: enable nnpa" ON) option(GGML_CPU_ALL_VARIANTS "ggml: build all variants of the CPU backend (requires GGML_BACKEND_DL)" OFF) set(GGML_CPU_ARM_ARCH "" CACHE STRING "ggml: CPU architecture for ARM") @@ -172,6 +173,7 @@ option(GGML_HIP "ggml: use HIP" option(GGML_HIP_GRAPHS "ggml: use HIP graph, experimental, slow" OFF) option(GGML_HIP_NO_VMM "ggml: do not try to use HIP VMM" ON) option(GGML_HIP_ROCWMMA_FATTN "ggml: enable rocWMMA for FlashAttention" OFF) +option(GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 "ggml: enable rocWMMA FlashAttention on GFX12" OFF) option(GGML_VULKAN "ggml: use Vulkan" OFF) option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF) option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF) @@ -179,7 +181,8 @@ option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug ou option(GGML_VULKAN_SHADER_DEBUG_INFO "ggml: enable Vulkan shader debug info" OFF) option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF) option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF) -option(GGML_KOMPUTE "ggml: use Kompute" OFF) +option(GGML_WEBGPU "ggml: use WebGPU" OFF) +option(GGML_WEBGPU_DEBUG "ggml: enable WebGPU debug output" OFF) option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT}) option(GGML_METAL_USE_BF16 "ggml: use bfloat if available" OFF) option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF) @@ -264,12 +267,12 @@ set(GGML_PUBLIC_HEADERS include/ggml-cann.h include/ggml-cpp.h include/ggml-cuda.h - include/ggml-kompute.h include/ggml-opt.h include/ggml-metal.h include/ggml-rpc.h include/ggml-sycl.h include/ggml-vulkan.h + include/ggml-webgpu.h include/gguf.h) set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") @@ -358,6 +361,13 @@ write_basic_package_version_file( VERSION ${GGML_INSTALL_VERSION} COMPATIBILITY SameMajorVersion) +target_compile_definitions(ggml-base PRIVATE + GGML_VERSION="${GGML_INSTALL_VERSION}" + GGML_COMMIT="${GGML_BUILD_COMMIT}" +) +message(STATUS "ggml version: ${GGML_INSTALL_VERSION}") +message(STATUS "ggml commit: ${GGML_BUILD_COMMIT}") + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml-config.cmake ${CMAKE_CURRENT_BINARY_DIR}/ggml-version.cmake DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/ggml) @@ -367,6 +377,8 @@ if (MSVC) /wd4005 # Macro redefinition /wd4244 # Conversion from one type to another type, possible loss of data /wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data + /wd4305 # Conversion from 'type1' to 'type2', possible loss of data + /wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data /wd4996 # Disable POSIX deprecation warnings /wd4702 # Unreachable code warnings ) @@ -386,4 +398,46 @@ if (MSVC) disable_msvc_warnings(ggml-cpu-skylakex) disable_msvc_warnings(ggml-cpu-icelake) disable_msvc_warnings(ggml-cpu-alderlake) + + if (GGML_BUILD_EXAMPLES) + disable_msvc_warnings(common-ggml) + disable_msvc_warnings(common) + + disable_msvc_warnings(mnist-common) + disable_msvc_warnings(mnist-eval) + disable_msvc_warnings(mnist-train) + + disable_msvc_warnings(gpt-2-ctx) + disable_msvc_warnings(gpt-2-alloc) + disable_msvc_warnings(gpt-2-backend) + disable_msvc_warnings(gpt-2-sched) + disable_msvc_warnings(gpt-2-quantize) + disable_msvc_warnings(gpt-2-batched) + + disable_msvc_warnings(gpt-j) + disable_msvc_warnings(gpt-j-quantize) + + disable_msvc_warnings(magika) + disable_msvc_warnings(yolov3-tiny) + disable_msvc_warnings(sam) + + disable_msvc_warnings(simple-ctx) + disable_msvc_warnings(simple-backend) + endif() + + if (GGML_BUILD_TESTS) + disable_msvc_warnings(test-mul-mat) + disable_msvc_warnings(test-arange) + disable_msvc_warnings(test-backend-ops) + disable_msvc_warnings(test-cont) + disable_msvc_warnings(test-conv-transpose) + disable_msvc_warnings(test-conv-transpose-1d) + disable_msvc_warnings(test-conv1d) + disable_msvc_warnings(test-conv2d) + disable_msvc_warnings(test-conv2d-dw) + disable_msvc_warnings(test-customop) + disable_msvc_warnings(test-dup) + disable_msvc_warnings(test-opt) + disable_msvc_warnings(test-pool) + endif () endif() diff --git a/ggml/cmake/common.cmake b/ggml/cmake/common.cmake index bb1ec9b37a7f0..cb66388332040 100644 --- a/ggml/cmake/common.cmake +++ b/ggml/cmake/common.cmake @@ -36,8 +36,7 @@ function(ggml_get_system_arch) (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64|amd64)$")) set(GGML_SYSTEM_ARCH "x86" PARENT_SCOPE) - elseif ("${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "ppc64le " OR - "${CMAKE_SYSTEM_PROCESSOR} " STREQUAL "powerpc ") + elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc|power") set(GGML_SYSTEM_ARCH "PowerPC" PARENT_SCOPE) elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") set(GGML_SYSTEM_ARCH "loongarch64" PARENT_SCOPE) diff --git a/ggml/include/ggml-backend.h b/ggml/include/ggml-backend.h index 778927f68217a..a2977ea2e56d9 100644 --- a/ggml/include/ggml-backend.h +++ b/ggml/include/ggml-backend.h @@ -339,7 +339,7 @@ extern "C" { typedef bool (*ggml_backend_eval_callback)(int node_index, struct ggml_tensor * t1, struct ggml_tensor * t2, void * user_data); // Compare the output of two backends - GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data); + GGML_API bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node); // Tensor initialization GGML_API enum ggml_status ggml_backend_tensor_alloc(ggml_backend_buffer_t buffer, struct ggml_tensor * tensor, void * addr); diff --git a/ggml/include/ggml-cpu.h b/ggml/include/ggml-cpu.h index de77a875ec533..be40b100979de 100644 --- a/ggml/include/ggml-cpu.h +++ b/ggml/include/ggml-cpu.h @@ -101,6 +101,7 @@ extern "C" { GGML_BACKEND_API int ggml_cpu_has_riscv_v (void); GGML_BACKEND_API int ggml_cpu_has_vsx (void); GGML_BACKEND_API int ggml_cpu_has_vxe (void); + GGML_BACKEND_API int ggml_cpu_has_nnpa (void); GGML_BACKEND_API int ggml_cpu_has_wasm_simd (void); GGML_BACKEND_API int ggml_cpu_has_llamafile (void); @@ -133,6 +134,7 @@ extern "C" { GGML_BACKEND_API ggml_backend_reg_t ggml_backend_cpu_reg(void); + GGML_BACKEND_API void ggml_cpu_fp32_to_fp32(const float *, float *, int64_t); GGML_BACKEND_API void ggml_cpu_fp32_to_fp16(const float *, ggml_fp16_t *, int64_t); GGML_BACKEND_API void ggml_cpu_fp16_to_fp32(const ggml_fp16_t *, float *, int64_t); GGML_BACKEND_API void ggml_cpu_fp32_to_bf16(const float *, ggml_bf16_t *, int64_t); diff --git a/ggml/include/ggml-kompute.h b/ggml/include/ggml-kompute.h deleted file mode 100644 index 154aa56a742f4..0000000000000 --- a/ggml/include/ggml-kompute.h +++ /dev/null @@ -1,50 +0,0 @@ -#pragma once - -#include "ggml.h" -#include "ggml-backend.h" - -#include -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define GGML_KOMPUTE_MAX_DEVICES 16 - -struct ggml_vk_device { - int index; - int type; // same as VkPhysicalDeviceType - size_t heapSize; - const char * name; - const char * vendor; - int subgroupSize; - uint64_t bufferAlignment; - uint64_t maxAlloc; -}; - -struct ggml_vk_device * ggml_vk_available_devices(size_t memoryRequired, size_t * count); -bool ggml_vk_get_device(struct ggml_vk_device * device, size_t memoryRequired, const char * name); -bool ggml_vk_has_vulkan(void); -bool ggml_vk_has_device(void); -struct ggml_vk_device ggml_vk_current_device(void); - -// -// backend API -// - -// forward declaration -typedef struct ggml_backend * ggml_backend_t; - -GGML_BACKEND_API ggml_backend_t ggml_backend_kompute_init(int device); - -GGML_BACKEND_API bool ggml_backend_is_kompute(ggml_backend_t backend); - -GGML_BACKEND_API ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device); - -GGML_BACKEND_API ggml_backend_reg_t ggml_backend_kompute_reg(void); - -#ifdef __cplusplus -} -#endif diff --git a/ggml/include/ggml-webgpu.h b/ggml/include/ggml-webgpu.h new file mode 100644 index 0000000000000..65b8ed9bb6644 --- /dev/null +++ b/ggml/include/ggml-webgpu.h @@ -0,0 +1,19 @@ +#pragma once + +#include "ggml.h" +#include "ggml-backend.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define GGML_WEBGPU_NAME "WebGPU" + +// Needed for examples in ggml +GGML_BACKEND_API ggml_backend_t ggml_backend_webgpu_init(void); + +GGML_BACKEND_API ggml_backend_reg_t ggml_backend_webgpu_reg(void); + +#ifdef __cplusplus +} +#endif diff --git a/ggml/include/ggml.h b/ggml/include/ggml.h index 1a57f1cd75a31..8a8775be36583 100644 --- a/ggml/include/ggml.h +++ b/ggml/include/ggml.h @@ -314,6 +314,13 @@ extern "C" { #endif + // Function type used in fatal error callbacks + typedef void (*ggml_abort_callback_t)(const char * error_message); + + // Set the abort callback (passing null will restore original abort functionality: printing a message to stdout) + // Returns the old callback for chaining + GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback); + GGML_NORETURN GGML_ATTRIBUTE_FORMAT(3, 4) GGML_API void ggml_abort(const char * file, int line, const char * fmt, ...); @@ -470,6 +477,7 @@ extern "C" { GGML_OP_TRANSPOSE, GGML_OP_GET_ROWS, GGML_OP_GET_ROWS_BACK, + GGML_OP_SET_ROWS, GGML_OP_DIAG, GGML_OP_DIAG_MASK_INF, GGML_OP_DIAG_MASK_ZERO, @@ -481,14 +489,16 @@ extern "C" { GGML_OP_CONV_TRANSPOSE_1D, GGML_OP_IM2COL, GGML_OP_IM2COL_BACK, + GGML_OP_CONV_2D, GGML_OP_CONV_2D_DW, GGML_OP_CONV_TRANSPOSE_2D, GGML_OP_POOL_1D, GGML_OP_POOL_2D, GGML_OP_POOL_2D_BACK, - GGML_OP_UPSCALE, // nearest interpolate + GGML_OP_UPSCALE, GGML_OP_PAD, GGML_OP_PAD_REFLECT_1D, + GGML_OP_ROLL, GGML_OP_ARANGE, GGML_OP_TIMESTEP_EMBEDDING, GGML_OP_ARGSORT, @@ -518,6 +528,8 @@ extern "C" { GGML_OP_CROSS_ENTROPY_LOSS_BACK, GGML_OP_OPT_STEP_ADAMW, + GGML_OP_GLU, + GGML_OP_COUNT, }; @@ -541,6 +553,16 @@ extern "C" { GGML_UNARY_OP_COUNT, }; + enum ggml_glu_op { + GGML_GLU_OP_REGLU, + GGML_GLU_OP_GEGLU, + GGML_GLU_OP_SWIGLU, + GGML_GLU_OP_GEGLU_ERF, + GGML_GLU_OP_GEGLU_QUICK, + + GGML_GLU_OP_COUNT, + }; + enum ggml_object_type { GGML_OBJECT_TYPE_TENSOR, GGML_OBJECT_TYPE_GRAPH, @@ -626,6 +648,9 @@ extern "C" { // misc + GGML_API const char * ggml_version(void); + GGML_API const char * ggml_commit(void); + GGML_API void ggml_time_init(void); // call this once at the beginning of the program GGML_API int64_t ggml_time_ms(void); GGML_API int64_t ggml_time_us(void); @@ -656,6 +681,7 @@ extern "C" { GGML_API const char * ggml_op_symbol(enum ggml_op op); GGML_API const char * ggml_unary_op_name(enum ggml_unary_op op); + GGML_API const char * ggml_glu_op_name(enum ggml_glu_op op); GGML_API const char * ggml_op_desc(const struct ggml_tensor * t); // unary or op name GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor); @@ -686,6 +712,9 @@ extern "C" { // true for tensor that is stored in memory as CxWxHxN and has been permuted to WxHxCxN GGML_API bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor); + // true if the elements in dimension 0 are contiguous, or there is just 1 block of elements + GGML_API bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor); + GGML_API bool ggml_are_same_shape (const struct ggml_tensor * t0, const struct ggml_tensor * t1); GGML_API bool ggml_are_same_stride(const struct ggml_tensor * t0, const struct ggml_tensor * t1); @@ -757,6 +786,7 @@ extern "C" { GGML_API void ggml_unravel_index(const struct ggml_tensor * tensor, int64_t i, int64_t * i0, int64_t * i1, int64_t * i2, int64_t * i3); GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); + GGML_API enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor); GGML_API void * ggml_get_data (const struct ggml_tensor * tensor); GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); @@ -1085,6 +1115,89 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // gated linear unit ops + // A: n columns, r rows, + // result is n / 2 columns, r rows, + // expects gate in second half of row, unless swapped is true + GGML_API struct ggml_tensor * ggml_glu( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_glu_op op, + bool swapped); + + GGML_API struct ggml_tensor * ggml_reglu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_reglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_swiglu( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_swiglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_erf_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_quick( + struct ggml_context * ctx, + struct ggml_tensor * a); + + GGML_API struct ggml_tensor * ggml_geglu_quick_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a); + + // A: n columns, r rows, + // B: n columns, r rows, + GGML_API struct ggml_tensor * ggml_glu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op); + + GGML_API struct ggml_tensor * ggml_reglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_swiglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu_erf_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + + GGML_API struct ggml_tensor * ggml_geglu_quick_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b); + // normalize along rows GGML_API struct ggml_tensor * ggml_norm( struct ggml_context * ctx, @@ -1184,6 +1297,19 @@ extern "C" { struct ggml_tensor * a, float s); + // x = s * a + b + GGML_API struct ggml_tensor * ggml_scale_bias( + struct ggml_context * ctx, + struct ggml_tensor * a, + float s, + float b); + + GGML_API struct ggml_tensor * ggml_scale_bias_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + float s, + float b); + // b -> view(a,offset,nb1,nb2,3), return modified a GGML_API struct ggml_tensor * ggml_set( struct ggml_context * ctx, @@ -1374,6 +1500,23 @@ extern "C" { struct ggml_tensor * b, // row indices struct ggml_tensor * c); // data for ggml_get_rows, only used for its shape + // a TD [n_embd, ne1, ne2, ne3] + // b TS [n_embd, n_rows, ne02, ne03] | ne02 == ne2, ne03 == ne3 + // c I64 [n_rows, ne11, ne12, 1] | c[i] in [0, ne1) + // + // undefined behavior if destination rows overlap + // + // broadcast: + // ne2 % ne11 == 0 + // ne3 % ne12 == 0 + // + // return view(a) + GGML_API struct ggml_tensor * ggml_set_rows( + struct ggml_context * ctx, + struct ggml_tensor * a, // destination + struct ggml_tensor * b, // source + struct ggml_tensor * c); // row indices + GGML_API struct ggml_tensor * ggml_diag( struct ggml_context * ctx, struct ggml_tensor * a); @@ -1411,8 +1554,14 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a); + // a [ne0, ne01, ne02, ne03] + // mask [ne0, ne11, ne12, ne13] | ne11 >= ne01, F16 or F32, optional + // + // broadcast: + // ne02 % ne12 == 0 + // ne03 % ne13 == 0 + // // fused soft_max(a*scale + mask*(ALiBi slope)) - // mask is optional // max_bias = 0.0f for no ALiBi GGML_API struct ggml_tensor * ggml_soft_max_ext( struct ggml_context * ctx, @@ -1722,6 +1871,17 @@ extern "C" { struct ggml_tensor * b, int stride); + GGML_API struct ggml_tensor * ggml_conv_2d_direct( + struct ggml_context * ctx, + struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC] + struct ggml_tensor * b, // input data [W, H, C, N] + int s0, // stride dimension 0 + int s1, // stride dimension 1 + int p0, // padding dimension 0 + int p1, // padding dimension 1 + int d0, // dilation dimension 0 + int d1); // dilation dimension 1 + enum ggml_op_pool { GGML_OP_POOL_MAX, GGML_OP_POOL_AVG, @@ -1764,6 +1924,12 @@ extern "C" { enum ggml_scale_mode { GGML_SCALE_MODE_NEAREST = 0, GGML_SCALE_MODE_BILINEAR = 1, + + GGML_SCALE_MODE_COUNT + }; + + enum ggml_scale_flag { + GGML_SCALE_FLAG_ALIGN_CORNERS = (1 << 8) }; // interpolate @@ -1776,14 +1942,26 @@ extern "C" { // interpolate // interpolate scale to specified dimensions - GGML_API struct ggml_tensor * ggml_upscale_ext( + GGML_DEPRECATED(GGML_API struct ggml_tensor * ggml_upscale_ext( struct ggml_context * ctx, struct ggml_tensor * a, int ne0, int ne1, int ne2, int ne3, - enum ggml_scale_mode mode); + enum ggml_scale_mode mode), + "use ggml_interpolate instead"); + + // Up- or downsamples the input to the specified size. + // 2D scale modes (eg. bilinear) are applied to the first two dimensions. + GGML_API struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode); // ggml_scale_mode [ | ggml_scale_flag...] // pad each dimension with zeros: [x, ..., x] -> [x, ..., x, 0, ..., 0] GGML_API struct ggml_tensor * ggml_pad( @@ -1801,6 +1979,17 @@ extern "C" { int p0, int p1); + // Move tensor elements by an offset given for each dimension. Elements that + // are shifted beyond the last position are wrapped around to the beginning. + GGML_API struct ggml_tensor * ggml_roll( + struct ggml_context * ctx, + struct ggml_tensor * a, + int shift0, + int shift1, + int shift2, + int shift3); + + // Ref: https://github.com/CompVis/stable-diffusion/blob/main/ldm/modules/diffusionmodules/util.py#L151 // timesteps: [N,] // return: [N, dim] @@ -1835,11 +2024,17 @@ extern "C" { #define GGML_KQ_MASK_PAD 64 - // q: [n_embd_k, n_batch, n_head, 1] - // k: [n_embd_k, n_kv, n_head_kv, 1] - // v: [n_embd_v, n_kv, n_head_kv, 1] !! not transposed !! - // mask: [n_kv, n_batch_pad, 1, 1] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !! - // res: [n_embd_v, n_head, n_batch, 1] !! permuted !! + // q: [n_embd_k, n_batch, n_head, ne3 ] + // k: [n_embd_k, n_kv, n_head_kv, ne3 ] + // v: [n_embd_v, n_kv, n_head_kv, ne3 ] !! not transposed !! + // mask: [n_kv, n_batch_pad, ne32, ne33] !! n_batch_pad = GGML_PAD(n_batch, GGML_KQ_MASK_PAD) !! + // res: [n_embd_v, n_head, n_batch, ne3 ] !! permuted !! + // + // broadcast: + // n_head % n_head_kv == 0 + // n_head % ne32 == 0 + // ne3 % ne33 == 0 + // GGML_API struct ggml_tensor * ggml_flash_attn_ext( struct ggml_context * ctx, struct ggml_tensor * q, @@ -1878,7 +2073,8 @@ extern "C" { struct ggml_tensor * dt, struct ggml_tensor * A, struct ggml_tensor * B, - struct ggml_tensor * C); + struct ggml_tensor * C, + struct ggml_tensor * ids); // partition into non-overlapping windows with padding if needed // example: diff --git a/ggml/src/CMakeLists.txt b/ggml/src/CMakeLists.txt index 726da5e048b18..0425fd60a9412 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml/src/CMakeLists.txt @@ -286,6 +286,10 @@ function(ggml_add_cpu_backend_variant tag_name) foreach (feat ${ARGN}) set(GGML_INTERNAL_${feat} ON) endforeach() + elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") + foreach (feat ${ARGN}) + set(GGML_INTERNAL_${feat} ON) + endforeach() endif() ggml_add_cpu_backend_variant_impl(${tag_name}) @@ -311,18 +315,45 @@ if (GGML_CPU_ALL_VARIANTS) # MSVC doesn't support AMX ggml_add_cpu_backend_variant(sapphirerapids SSE42 AVX F16C AVX2 BMI2 FMA AVX512 AVX512_VBMI AVX512_VNNI AVX512_BF16 AMX_TILE AMX_INT8) endif() - elseif(GGML_SYSTEM_ARCH STREQUAL "ARM" AND CMAKE_SYSTEM_NAME MATCHES "Linux") - # Many of these features are optional so we build versions with popular - # combinations and name the backends based on the version they were - # first released with - ggml_add_cpu_backend_variant(armv8.0_1) - ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD) - ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) - ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE) - ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8) - ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) - ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME) - ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME) + elseif(GGML_SYSTEM_ARCH STREQUAL "ARM") + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + # Many of these features are optional so we build versions with popular + # combinations and name the backends based on the version they were + # first released with + ggml_add_cpu_backend_variant(armv8.0_1) + ggml_add_cpu_backend_variant(armv8.2_1 DOTPROD) + ggml_add_cpu_backend_variant(armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) + ggml_add_cpu_backend_variant(armv8.2_3 DOTPROD FP16_VECTOR_ARITHMETIC SVE) + ggml_add_cpu_backend_variant(armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8) + ggml_add_cpu_backend_variant(armv8.6_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2) + ggml_add_cpu_backend_variant(armv9.2_1 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SME) + ggml_add_cpu_backend_variant(armv9.2_2 DOTPROD FP16_VECTOR_ARITHMETIC SVE MATMUL_INT8 SVE2 SME) + elseif (CMAKE_SYSTEM_NAME MATCHES "Android") + # Android-specific backends with SoC-compatible feature sets + ggml_add_cpu_backend_variant(android_armv8.0_1) + ggml_add_cpu_backend_variant(android_armv8.2_1 DOTPROD) + ggml_add_cpu_backend_variant(android_armv8.2_2 DOTPROD FP16_VECTOR_ARITHMETIC) + ggml_add_cpu_backend_variant(android_armv8.6_1 DOTPROD FP16_VECTOR_ARITHMETIC MATMUL_INT8) + elseif (APPLE) + ggml_add_cpu_backend_variant(apple_m1 DOTPROD) + ggml_add_cpu_backend_variant(apple_m2_m3 DOTPROD MATMUL_INT8) + ggml_add_cpu_backend_variant(apple_m4 DOTPROD MATMUL_INT8 NOSVE SME) + else() + message(FATAL_ERROR "Unsupported ARM target OS: ${CMAKE_SYSTEM_NAME}") + endif() + elseif (GGML_SYSTEM_ARCH STREQUAL "PowerPC") + if (CMAKE_SYSTEM_NAME MATCHES "Linux") + ggml_add_cpu_backend_variant(power0) + ggml_add_cpu_backend_variant(power7_1 POWER7) + ggml_add_cpu_backend_variant(power7_2 POWER7 VSX) + ggml_add_cpu_backend_variant(power8_1 POWER8) + ggml_add_cpu_backend_variant(power8_2 POWER8 VSX) + ggml_add_cpu_backend_variant(power9 POWER9 VSX) + ggml_add_cpu_backend_variant(power10 POWER10 VSX) + ggml_add_cpu_backend_variant(power11 POWER11 VSX) + else() + message(FATAL_ERROR "Unsupported PowerPC target OS: ${CMAKE_SYSTEM_NAME}") + endif() else() message(FATAL_ERROR "GGML_CPU_ALL_VARIANTS not yet supported with ${GGML_SYSTEM_ARCH} on ${CMAKE_SYSTEM_NAME}") endif() @@ -334,12 +365,12 @@ ggml_add_backend(BLAS) ggml_add_backend(CANN) ggml_add_backend(CUDA) ggml_add_backend(HIP) -ggml_add_backend(Kompute) ggml_add_backend(METAL) ggml_add_backend(MUSA) ggml_add_backend(RPC) ggml_add_backend(SYCL) ggml_add_backend(Vulkan) +ggml_add_backend(WebGPU) ggml_add_backend(OpenCL) foreach (target ggml-base ggml) diff --git a/ggml/src/ggml-backend-reg.cpp b/ggml/src/ggml-backend-reg.cpp index 405d8e31514b5..f0cdac31eae9a 100644 --- a/ggml/src/ggml-backend-reg.cpp +++ b/ggml/src/ggml-backend-reg.cpp @@ -45,6 +45,10 @@ #include "ggml-vulkan.h" #endif +#ifdef GGML_USE_WEBGPU +#include "ggml-webgpu.h" +#endif + #ifdef GGML_USE_OPENCL #include "ggml-opencl.h" #endif @@ -61,14 +65,13 @@ #include "ggml-cann.h" #endif -#ifdef GGML_USE_KOMPUTE -#include "ggml-kompute.h" -#endif - // disable C++17 deprecation warning for std::codecvt_utf8 #if defined(__clang__) # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wdeprecated-declarations" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif namespace fs = std::filesystem; @@ -91,6 +94,8 @@ static std::string path_str(const fs::path & path) { #if defined(__clang__) # pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop #endif #ifdef _WIN32 @@ -172,6 +177,9 @@ struct ggml_backend_registry { #ifdef GGML_USE_VULKAN register_backend(ggml_backend_vk_reg()); #endif +#ifdef GGML_USE_WEBGPU + register_backend(ggml_backend_webgpu_reg()); +#endif #ifdef GGML_USE_OPENCL register_backend(ggml_backend_opencl_reg()); #endif @@ -184,9 +192,6 @@ struct ggml_backend_registry { #ifdef GGML_USE_RPC register_backend(ggml_backend_rpc_reg()); #endif -#ifdef GGML_USE_KOMPUTE - register_backend(ggml_backend_kompute_reg()); -#endif #ifdef GGML_USE_CPU register_backend(ggml_backend_cpu_reg()); #endif @@ -570,7 +575,6 @@ void ggml_backend_load_all_from_path(const char * dir_path) { ggml_backend_load_best("cann", silent, dir_path); ggml_backend_load_best("cuda", silent, dir_path); ggml_backend_load_best("hip", silent, dir_path); - ggml_backend_load_best("kompute", silent, dir_path); ggml_backend_load_best("metal", silent, dir_path); ggml_backend_load_best("rpc", silent, dir_path); ggml_backend_load_best("sycl", silent, dir_path); diff --git a/ggml/src/ggml-backend.cpp b/ggml/src/ggml-backend.cpp index b1050ad59c26a..788861a365fab 100644 --- a/ggml/src/ggml-backend.cpp +++ b/ggml/src/ggml-backend.cpp @@ -817,8 +817,9 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str } if (sched->debug > 1) { ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node); - GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name, - fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node)); + GGML_LOG_DEBUG("node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s] use=%d:", i, ggml_op_name(node->op), node->name, + fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node), + graph->use_counts[ggml_hash_find(&graph->visited_hash_set, node)]); for (int j = 0; j < GGML_MAX_SRC; j++) { struct ggml_tensor * src = node->src[j]; if (src == NULL) { @@ -1826,7 +1827,7 @@ void ggml_backend_graph_copy_free(struct ggml_backend_graph_copy copy) { ggml_free(copy.ctx_unallocated); } -bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data) { +bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t backend2, struct ggml_cgraph * graph, ggml_backend_eval_callback callback, void * user_data, struct ggml_tensor * test_node) { struct ggml_backend_graph_copy copy = ggml_backend_graph_copy(backend2, graph); if (copy.buffer == NULL) { return false; @@ -1837,28 +1838,45 @@ bool ggml_backend_compare_graph_backend(ggml_backend_t backend1, ggml_backend_t assert(g1->n_nodes == g2->n_nodes); - for (int i = 0; i < g1->n_nodes; i++) { - struct ggml_tensor * t1 = g1->nodes[i]; - struct ggml_tensor * t2 = g2->nodes[i]; + if (test_node != nullptr) { + // Compute the whole graph and only test the output for a specific tensor + ggml_backend_graph_compute(backend1, g1); + ggml_backend_graph_compute(backend2, g2); - assert(t1->op == t2->op && ggml_are_same_layout(t1, t2)); + int test_node_idx = -1; + for (int i = 0; i < g1->n_nodes; i++) { + struct ggml_tensor * t1 = g1->nodes[i]; + if (t1 == test_node) { + test_node_idx = i; + break; + } + } + GGML_ASSERT(test_node_idx != -1); - struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1); - struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1); + callback(test_node_idx, g1->nodes[test_node_idx], g2->nodes[test_node_idx], user_data); + } else { + for (int i = 0; i < g1->n_nodes; i++) { + struct ggml_tensor * t1 = g1->nodes[i]; + struct ggml_tensor * t2 = g2->nodes[i]; - ggml_backend_graph_compute(backend1, &g1v); - ggml_backend_graph_compute(backend2, &g2v); + assert(t1->op == t2->op && ggml_are_same_layout(t1, t2)); - if (ggml_is_view_op(t1->op)) { - continue; - } + struct ggml_cgraph g1v = ggml_graph_view(g1, i, i + 1); + struct ggml_cgraph g2v = ggml_graph_view(g2, i, i + 1); - // compare results, calculate rms etc - if (!callback(i, t1, t2, user_data)) { - break; + ggml_backend_graph_compute(backend1, &g1v); + ggml_backend_graph_compute(backend2, &g2v); + + if (ggml_is_view_op(t1->op)) { + continue; + } + + // compare results, calculate rms etc + if (!callback(i, t1, t2, user_data)) { + break; + } } } - ggml_backend_graph_copy_free(copy); return true; diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml/src/ggml-cann/aclnn_ops.cpp index 437ece2d4a3cf..4d5c2c182521f 100755 --- a/ggml/src/ggml-cann/aclnn_ops.cpp +++ b/ggml/src/ggml-cann/aclnn_ops.cpp @@ -65,8 +65,9 @@ #include #include #include -#include +#include #include +#include #include #include @@ -804,10 +805,11 @@ static aclTensor* aclnn_zero(ggml_backend_cann_context& ctx, void* buffer, nb[i] = nb[i - 1] * ne[i - 1]; } - ggml_cann_async_memset(ctx, buffer, n_bytes, 0); aclTensor* zero = ggml_cann_create_tensor(buffer, type, type_size, ne, nb, dims); + GGML_CANN_CALL_ACLNN_OP(ctx, InplaceZero, zero); return zero; + GGML_UNUSED(n_bytes); } /** @@ -2654,6 +2656,67 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* memcpy(ori_src0_nb, cast_nb, sizeof(ori_src0_nb)); } +#ifdef ASCEND_310P + ggml_tensor src0_row = *src0; + ggml_tensor src1_row = *src1; + ggml_tensor dst_row = *dst; + + if (src0->type == GGML_TYPE_F16) { + src0_row.type = GGML_TYPE_F32; + } + + // src0_row [D, M, 1, 1] weight without permute + src0_row.ne[2] = 1; + src0_row.ne[3] = 1; + src0_row.nb[0] = ori_src0_nb[0]; + src0_row.nb[1] = ori_src0_nb[1]; + src0_row.nb[2] = ori_src0_nb[1]; + src0_row.nb[3] = ori_src0_nb[1]; + + // src1_row [D, 1, 1, 1] -> input + src1_row.ne[1] = 1; + src1_row.ne[2] = 1; + src1_row.ne[3] = 1; + src1_row.nb[2] = nb11; + src1_row.nb[3] = nb11; + + // dst_row [M, 1, 1, 1] -> out + dst_row.ne[1] = 1; + dst_row.ne[2] = 1; + dst_row.ne[3] = 1; + dst_row.nb[2] = nb1; + dst_row.nb[3] = nb1; + + //create weight for one row + for (int64_t iid1 = 0; iid1 < ids->ne[1]; iid1++) { + for (int64_t id = 0; id < n_ids; id++) { + // expert index + int32_t i02 = *(int32_t *) (ids_host.data() + iid1*ids->nb[1] + id*ids->nb[0]); + GGML_ASSERT(i02 >= 0 && i02 < n_as); + + // If B = 1 (broadcast), always use 0; otherwise, use id. + int64_t i11 = (ne11 == 1 ? 0 : id); + int64_t i12 = iid1; + + int64_t i1 = id; + int64_t i2 = i12; + + void* src0_tmp_ptr = src0_original + i02*ori_src0_nb[2]; + void* src1_tmp_ptr = src1_original + i11*nb11 + i12*nb12; + void* dst_tmp_ptr = dst_original + i1*nb1 + i2*nb2; + + src0_row.data = src0_tmp_ptr; + src1_row.data = src1_tmp_ptr; + dst_row.data = dst_tmp_ptr; + dst_row.src[0] = &src0_row; + dst_row.src[1] = &src1_row; + + ggml_cann_mul_mat(ctx, &dst_row); + } + } + return; +#endif + std::vector src0_tensor_vec; std::vector src1_tensor_vec; std::vector dst_tensor_vec; @@ -2701,9 +2764,9 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* } size_t GROUP_SIZE = 128; - // GroupedMatmulV2 required tensor_list.size < 128 + // GroupedMatmulV3 required tensor_list.size < 128 for (size_t i = 0; i < src0_tensor_vec.size(); i += GROUP_SIZE) { - // split and call GroupedMatmulV2 + // split and call GroupedMatmulV3 size_t end = std::min(i + GROUP_SIZE, src0_tensor_vec.size()); std::vector src0_tensor_vec_split(src0_tensor_vec.begin() + i, src0_tensor_vec.begin() + end); std::vector src1_tensor_vec_split(src1_tensor_vec.begin() + i, src1_tensor_vec.begin() + end); @@ -2713,7 +2776,7 @@ static void ggml_cann_mul_mat_id_fp(ggml_backend_cann_context& ctx, ggml_tensor* aclTensorList* src1_tensor_list = aclCreateTensorList(src1_tensor_vec_split.data(), src1_tensor_vec_split.size()); aclTensorList* dst_tensor_list = aclCreateTensorList(dst_tensor_vec_split.data(), dst_tensor_vec_split.size()); - GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV2, src1_tensor_list, src0_tensor_list, + GGML_CANN_CALL_ACLNN_OP(ctx, GroupedMatmulV3, src1_tensor_list, src0_tensor_list, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, 0, -1, dst_tensor_list); ggml_cann_release_resources(ctx, src0_tensor_list, src1_tensor_list, dst_tensor_list); diff --git a/ggml/src/ggml-cann/common.h b/ggml/src/ggml-cann/common.h index ba2cef0c25fb2..8dfe3b061c13c 100755 --- a/ggml/src/ggml-cann/common.h +++ b/ggml/src/ggml-cann/common.h @@ -359,7 +359,7 @@ struct ggml_backend_cann_context { ggml_cann_set_device(device); description = aclrtGetSocName(); - bool async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); + async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF"); } diff --git a/ggml/src/ggml-cann/ggml-cann.cpp b/ggml/src/ggml-cann/ggml-cann.cpp index d1a0ad374d691..e5e11d4cdced9 100755 --- a/ggml/src/ggml-cann/ggml-cann.cpp +++ b/ggml/src/ggml-cann/ggml-cann.cpp @@ -2086,6 +2086,13 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, return false; } } break; + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 +#pragma message("TODO: implement F32, F16, BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)") + return false; + } break; case GGML_OP_CPY: { ggml_tensor *src = op->src[0]; if ((op->type != GGML_TYPE_F32 && op->type != GGML_TYPE_F16) || @@ -2182,12 +2189,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, case GGML_OP_MUL: case GGML_OP_DIV: case GGML_OP_RMS_NORM: - case GGML_OP_SCALE: case GGML_OP_SQR: case GGML_OP_SQRT: case GGML_OP_CLAMP: case GGML_OP_DIAG_MASK_INF: - case GGML_OP_SOFT_MAX: case GGML_OP_SUM_ROWS: case GGML_OP_ARGSORT: case GGML_OP_ACC: @@ -2205,6 +2210,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, case GGML_OP_PAD_REFLECT_1D: case GGML_OP_COUNT_EQUAL: return true; + case GGML_OP_SCALE: + float bias; + memcpy(&bias, (float*)op->op_params + 1, sizeof(float)); + return bias == 0.0f; // TODO: support bias != 0.0f + case GGML_OP_SOFT_MAX: + // TODO: support broadcast + // ref: https://github.com/ggml-org/llama.cpp/pull/14435 + return !op->src[1] || (op->src[1]->ne[2] == 1 && op->src[1]->ne[3] == 1); case GGML_OP_FLASH_ATTN_EXT:{ // derived from [ggml-cuda.cu] if(op->src[1]->type != GGML_TYPE_F16 || op->src[2]->type != GGML_TYPE_F16){ @@ -2227,6 +2240,8 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, // DeepSeek MLA return false; } + // TODO: support broadcast + // ref: https://github.com/ggml-org/llama.cpp/pull/14435 if (op->src[0]->ne[3] != 1) { return false; } diff --git a/ggml/src/ggml-cpu/CMakeLists.txt b/ggml/src/ggml-cpu/CMakeLists.txt index e4c0fa8d0240c..66a5ad8d2eddc 100644 --- a/ggml/src/ggml-cpu/CMakeLists.txt +++ b/ggml/src/ggml-cpu/CMakeLists.txt @@ -5,7 +5,7 @@ function(ggml_add_cpu_backend_features cpu_name arch) # build, using set_source_files_properties() to set the arch flags is not possible set(GGML_CPU_FEATS_NAME ${cpu_name}-feats) add_library(${GGML_CPU_FEATS_NAME} OBJECT ggml-cpu/arch/${arch}/cpu-feats.cpp) - target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . .. ../include) + target_include_directories(${GGML_CPU_FEATS_NAME} PRIVATE . ../include) target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE ${ARGN}) target_compile_definitions(${GGML_CPU_FEATS_NAME} PRIVATE GGML_BACKEND_DL GGML_BACKEND_BUILD GGML_BACKEND_SHARED) set_target_properties(${GGML_CPU_FEATS_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON) @@ -158,48 +158,48 @@ function(ggml_add_cpu_backend_variant_impl tag_name) if (GGML_CPU_ARM_ARCH) list(APPEND ARCH_FLAGS -march=${GGML_CPU_ARM_ARCH}) elseif(GGML_CPU_ALL_VARIANTS) - if (CMAKE_SYSTEM_NAME MATCHES "Linux") - # Begin with the lowest baseline - set(ARM_MCPU "armv8-a") - set(ARCH_TAGS "") - set(ARCH_DEFINITIONS "") - - # When a feature is selected, bump the MCPU to the first - # version that supported it - if (GGML_INTERNAL_DOTPROD) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+dotprod") - list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) - endif() - if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+fp16") - list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) - endif() - if (GGML_INTERNAL_SVE) - set(ARM_MCPU "armv8.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+sve") - list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) - endif() - if (GGML_INTERNAL_MATMUL_INT8) - set(ARM_MCPU "armv8.6-a") - set(ARCH_TAGS "${ARCH_TAGS}+i8mm") - list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) - endif() - if (GGML_INTERNAL_SVE2) - set(ARM_MCPU "armv8.6-a") - set(ARCH_TAGS "${ARCH_TAGS}+sve2") - list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) - endif() - if (GGML_INTERNAL_SME) - set(ARM_MCPU "armv9.2-a") - set(ARCH_TAGS "${ARCH_TAGS}+sme") - list(APPEND ARCH_DEFINITIONS GGML_USE_SME) - endif() - - list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") - ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) + # Begin with the lowest baseline + set(ARM_MCPU "armv8-a") + set(ARCH_TAGS "") + set(ARCH_DEFINITIONS "") + + # When a feature is selected, bump the MCPU to the first + # version that supported it + if (GGML_INTERNAL_DOTPROD) + set(ARM_MCPU "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+dotprod") + list(APPEND ARCH_DEFINITIONS GGML_USE_DOTPROD) + endif() + if (GGML_INTERNAL_FP16_VECTOR_ARITHMETIC) + set(ARM_MCPU "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+fp16") + list(APPEND ARCH_DEFINITIONS GGML_USE_FP16_VECTOR_ARITHMETIC) + endif() + if (GGML_INTERNAL_SVE) + set(ARM_MCPU "armv8.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+sve") + list(APPEND ARCH_DEFINITIONS GGML_USE_SVE) + endif() + if (GGML_INTERNAL_MATMUL_INT8) + set(ARM_MCPU "armv8.6-a") + set(ARCH_TAGS "${ARCH_TAGS}+i8mm") + list(APPEND ARCH_DEFINITIONS GGML_USE_MATMUL_INT8) + endif() + if (GGML_INTERNAL_SVE2) + set(ARM_MCPU "armv8.6-a") + set(ARCH_TAGS "${ARCH_TAGS}+sve2") + list(APPEND ARCH_DEFINITIONS GGML_USE_SVE2) endif() + if (GGML_INTERNAL_NOSVE) + set(ARCH_TAGS "${ARCH_TAGS}+nosve") + endif() + if (GGML_INTERNAL_SME) + set(ARM_MCPU "armv9.2-a") + set(ARCH_TAGS "${ARCH_TAGS}+sme") + list(APPEND ARCH_DEFINITIONS GGML_USE_SME) + endif() + list(APPEND ARCH_FLAGS "-march=${ARM_MCPU}${ARCH_TAGS}") + ggml_add_cpu_backend_features(${GGML_CPU_NAME} arm ${ARCH_DEFINITIONS}) endif() endif() @@ -388,6 +388,27 @@ function(ggml_add_cpu_backend_variant_impl tag_name) else() list(APPEND ARCH_FLAGS -mcpu=native -mtune=native -mpowerpc64) endif() + elseif(GGML_CPU_ALL_VARIANTS) + # Begin with the lowest baseline + set(ARCH_DEFINITIONS "") + + # When a feature is selected, bump the MCPU to the first + # version that supported it + foreach(PVER RANGE 7 11) + if(DEFINED GGML_INTERNAL_POWER${PVER}) + set(POWERPC_MCPU "power${PVER}") + list(APPEND ARCH_DEFINITIONS GGML_USE_POWER${PVER}) + endif() + endforeach() + if (GGML_INTERNAL_VSX) + list(APPEND ARCH_DEFINITIONS GGML_USE_VSX) + list(APPEND ARCH_FLAGS -mvsx) + endif() + + if (DEFINED POWERPC_MCPU) + list(APPEND ARCH_FLAGS -mcpu=${POWERPC_MCPU}) + endif() + ggml_add_cpu_backend_features(${GGML_CPU_NAME} powerpc ${ARCH_DEFINITIONS}) else() if (GGML_CPU_POWERPC_CPUTYPE) list(APPEND ARCH_FLAGS -mcpu=${GGML_CPU_POWERPC_CPUTYPE}) @@ -427,6 +448,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # TODO: Separation to determine activation of VX/VXE/VXE2 if (${S390X_M} MATCHES "8561|8562") + set(GGML_NNPA OFF) message(STATUS "z15 target") list(APPEND ARCH_FLAGS -march=z15) elseif (${S390X_M} MATCHES "3931") @@ -443,7 +465,14 @@ function(ggml_add_cpu_backend_variant_impl tag_name) endif() if (GGML_VXE) + message(STATUS "VX/VXE/VXE2 enabled") list(APPEND ARCH_FLAGS -mvx -mzvector) + list(APPEND ARCH_DEFINITIONS GGML_VXE) + endif() + + if (GGML_NNPA) + message(STATUS "NNPA enabled") + list(APPEND ARCH_DEFINITIONS GGML_NNPA) endif() elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "wasm") message(STATUS "Wasm detected") @@ -465,9 +494,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name) # Fetch KleidiAI sources: include(FetchContent) - set(KLEIDIAI_COMMIT_TAG "v1.6.0") + set(KLEIDIAI_COMMIT_TAG "v1.9.0") set(KLEIDIAI_DOWNLOAD_URL "https://github.com/ARM-software/kleidiai/archive/refs/tags/${KLEIDIAI_COMMIT_TAG}.tar.gz") - set(KLEIDIAI_ARCHIVE_MD5 "75b4ad68f25ab673dcc01065e5a0b05f") + set(KLEIDIAI_ARCHIVE_MD5 "2a8e1bb55d201557553545536489a017") if (POLICY CMP0135) cmake_policy(SET CMP0135 NEW) @@ -560,4 +589,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name) if (EMSCRIPTEN) set_target_properties(${GGML_CPU_NAME} PROPERTIES COMPILE_FLAGS "-msimd128") endif() + + if (CMAKE_CXX_COMPILER_ID STREQUAL "IntelLLVM") + # The compiler automatically enables "-ffast-math" which can cause NaNs in tests due to "-fassociative-math" + target_compile_options(${GGML_CPU_NAME} PRIVATE "-fno-associative-math") + endif() endfunction() diff --git a/ggml/src/ggml-cpu/amx/mmq.cpp b/ggml/src/ggml-cpu/amx/mmq.cpp index cec34eb6416ac..47c61b88164b8 100644 --- a/ggml/src/ggml-cpu/amx/mmq.cpp +++ b/ggml/src/ggml-cpu/amx/mmq.cpp @@ -8,6 +8,7 @@ #include "mmq.h" #include "ggml-impl.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "quants.h" #include "ggml-quants.h" #include @@ -453,7 +454,7 @@ void quantize_row_q8_K_vnni(const float * RESTRICT x, void * RESTRICT vy, int64_ // Quantize these floats const float iscale = 127.f / amax; - y[i].d = GGML_FP32_TO_FP16(1 / iscale); + y[i].d = GGML_CPU_FP32_TO_FP16(1 / iscale); const float id = ( amax != 0.0f ) ? iscale : 0.f; const __m512 vscale = _mm512_set1_ps(id); @@ -1090,7 +1091,7 @@ struct acc_C { const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset))); for (int m = 0; m < nr; ++m) { - const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d)); + const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d)); const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N)); __m512 vsum; @@ -1113,8 +1114,8 @@ struct acc_C { const __m512 vm0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset + TILE_N * sizeof(ggml_half)))); for (int m = 0; m < nr; ++m) { - const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d)); - const __m512 vs1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].s)); + const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d)); + const __m512 vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].s)); const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N)); __m512 vsum; @@ -1137,7 +1138,7 @@ struct acc_C { const __m512 vd0 = _mm512_cvtph_ps(_mm256_loadu_si256((const __m256i *)((const char *)packed_B + offset))); for (int m = 0; m < nr; ++m) { - const __m512 vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[m * lda].d)); + const __m512 vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[m * lda].d)); const __m512 vtile = _mm512_cvtepi32_ps(_mm512_loadu_si512(tile + m * TILE_N)); __m512 vsum; @@ -1437,7 +1438,7 @@ struct tinygemm_kernel_vnni for (int k = 0; k < 8; ++k) { va[k] = _mm512_set1_epi32(a_ptr[k]); } - vd1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].d)); - vs1 = _mm512_set1_ps(GGML_FP16_TO_FP32(A[0 * KB + i].s)); + vd1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].d)); + vs1 = _mm512_set1_ps(GGML_CPU_FP16_TO_FP32(A[0 * KB + i].s)); } // load b @@ -1571,7 +1572,7 @@ struct tinygemm_kernel_vnniqs + 16); float32_t _scale[4] = { - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d) + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y1->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y1->d) }; float32x4_t scale = vld1q_f32(_scale); @@ -274,10 +275,10 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // dot product sumv0 = svmla_n_f32_x(ph4, sumv0, svcvt_f32_s32_x(ph4, svadd_x(ph4, svdot_s32(svdup_n_s32(0), qx0ls, qy0l), - svdot_s32(svdup_n_s32(0), qx0hs, qy0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + svdot_s32(svdup_n_s32(0), qx0hs, qy0h))), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = svmla_n_f32_x(ph4, sumv1, svcvt_f32_s32_x(ph4, svadd_x(ph4, svdot_s32(svdup_n_s32(0), qx1ls, qy1l), - svdot_s32(svdup_n_s32(0), qx1hs, qy1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + svdot_s32(svdup_n_s32(0), qx1hs, qy1h))), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = svaddv_f32(svptrue_b32(), svadd_f32_x(svptrue_b32(), sumv0, sumv1)); @@ -313,9 +314,9 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // dot product sumv0 = svmla_n_f32_x(svptrue_b32(), sumv0, svcvt_f32_s32_x(svptrue_b32(), - svdot_s32(svdup_n_s32(0), qx0s, qy0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + svdot_s32(svdup_n_s32(0), qx0s, qy0)), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = svmla_n_f32_x(svptrue_b32(), sumv1, svcvt_f32_s32_x(svptrue_b32(), - svdot_s32(svdup_n_s32(0), qx1s, qy1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + svdot_s32(svdup_n_s32(0), qx1s, qy1)), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = svaddv_f32(svptrue_b32(), svadd_f32_x(svptrue_b32(), sumv0, sumv1)); @@ -354,9 +355,9 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // dot product sumv0 = svmla_n_f32_x(ph32, sumv0, svcvt_f32_s32_x(ph32, - svdot_s32(svdup_n_s32(0), qx0s, qy0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + svdot_s32(svdup_n_s32(0), qx0s, qy0)), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = svmla_n_f32_x(ph32, sumv1, svcvt_f32_s32_x(ph32, - svdot_s32(svdup_n_s32(0), qx1s, qy1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + svdot_s32(svdup_n_s32(0), qx1s, qy1)), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = svaddv_f32(ph32, svadd_f32_x(ph32, sumv0, sumv1)); @@ -404,8 +405,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0ls, v1_0l), v0_0hs, v1_0h); const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1ls, v1_1l), v0_1hs, v1_1h); - sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); - sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); @@ -423,7 +424,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -464,10 +465,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const block_q8_1 * GGML_RESTRICT b_y1 = &vy1[i]; float32_t summs_t[4] = { - GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s), - GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s), - GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s), - GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s) + GGML_CPU_FP16_TO_FP32(b_x0->m) * GGML_CPU_FP16_TO_FP32(b_y0->s), + GGML_CPU_FP16_TO_FP32(b_x1->m) * GGML_CPU_FP16_TO_FP32(b_y0->s), + GGML_CPU_FP16_TO_FP32(b_x0->m) * GGML_CPU_FP16_TO_FP32(b_y1->s), + GGML_CPU_FP16_TO_FP32(b_x1->m) * GGML_CPU_FP16_TO_FP32(b_y1->s) }; summs0 = vaddq_f32(summs0, vld1q_f32(summs_t)); @@ -490,10 +491,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // mmla into int32x4_t float32_t _scale[4] = { - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d) + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y1->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y1->d) }; float32x4_t scale = vld1q_f32(_scale); @@ -539,7 +540,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const block_q8_1 * GGML_RESTRICT y0 = &y[ib + 0]; const block_q8_1 * GGML_RESTRICT y1 = &y[ib + 1]; - summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s); + summs += GGML_CPU_FP16_TO_FP32(x0->m) * GGML_CPU_FP16_TO_FP32(y0->s) + GGML_CPU_FP16_TO_FP32(x1->m) * GGML_CPU_FP16_TO_FP32(y1->s); const uint8x16_t m4b = vdupq_n_u8(0x0F); @@ -562,8 +563,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h); const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h); - sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); - sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); + sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs; @@ -582,7 +583,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -666,10 +667,10 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); @@ -694,7 +695,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -739,8 +740,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8x16_t m4b = vdupq_n_u8(0x0F); - summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s); - summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s); + summs0 += GGML_CPU_FP16_TO_FP32(x0->m) * GGML_CPU_FP16_TO_FP32(y0->s); + summs1 += GGML_CPU_FP16_TO_FP32(x1->m) * GGML_CPU_FP16_TO_FP32(y1->s); // extract the 5th bit via lookup table ((b) << 4) memcpy(&qh0, x0->qh, sizeof(qh0)); @@ -784,10 +785,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l), - ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1; @@ -812,7 +813,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -864,10 +865,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi const int8x16_t y1_h = vld1q_s8(b_y1->qs + 16); float32_t _scale[4] = { - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x0->d)*GGML_FP16_TO_FP32(b_y1->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y0->d), - GGML_FP16_TO_FP32(b_x1->d)*GGML_FP16_TO_FP32(b_y1->d) + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x0->d)*GGML_CPU_FP16_TO_FP32(b_y1->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y0->d), + GGML_CPU_FP16_TO_FP32(b_x1->d)*GGML_CPU_FP16_TO_FP32(b_y1->d) }; float32x4_t scale = vld1q_f32(_scale); @@ -934,10 +935,10 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumv0 = svmla_n_f32_x(pl16, sumv0, svcvt_f32_s32_x(pl16, svadd_x(pl16, svdot_s32(svdup_n_s32(0), qx0_0, qy0_0), - svdot_s32(svdup_n_s32(0), qx0_1, qy0_1))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + svdot_s32(svdup_n_s32(0), qx0_1, qy0_1))), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = svmla_n_f32_x(pl16, sumv1, svcvt_f32_s32_x(pl16, svadd_x(pl16, svdot_s32(svdup_n_s32(0), qx1_0, qy1_0), - svdot_s32(svdup_n_s32(0), qx1_1, qy1_1))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + svdot_s32(svdup_n_s32(0), qx1_1, qy1_1))), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = svaddv_f32(pl16, svadd_f32_x(pl16, sumv0, sumv1)); @@ -960,9 +961,9 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi const svint8_t qy1 = svld1_s8(svptrue_b8(), y1->qs); sumv0 = svmla_n_f32_x(svptrue_b32(), sumv0, svcvt_f32_s32_x(svptrue_b32(), - svdot_s32(svdup_n_s32(0), qx0, qy0)), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + svdot_s32(svdup_n_s32(0), qx0, qy0)), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = svmla_n_f32_x(svptrue_b32(), sumv1, svcvt_f32_s32_x(svptrue_b32(), - svdot_s32(svdup_n_s32(0), qx1, qy1)), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + svdot_s32(svdup_n_s32(0), qx1, qy1)), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = svaddv_f32(svptrue_b32(), svadd_f32_x(svptrue_b32(), sumv0, sumv1)); @@ -1002,8 +1003,8 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi qy_64 = svadd_s8_x(svptrue_b8(), qy_32, qy_64); // scale creation - const float32_t deq1 = GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d); - const float32_t deq2 = GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d); + const float32_t deq1 = GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d); + const float32_t deq2 = GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d); // duplicate deq1 in first half of vector and deq2 in second half of vector const svfloat32_t temp = svdup_f32_m(svdup_f32_z(ph8, deq1), pl8, deq2); @@ -1043,11 +1044,11 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), x0_0, y0_0), - ggml_vdotq_s32(vdupq_n_s32(0), x0_1, y0_1))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d)); + ggml_vdotq_s32(vdupq_n_s32(0), x0_1, y0_1))), GGML_CPU_FP16_TO_FP32(x0->d)*GGML_CPU_FP16_TO_FP32(y0->d)); sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32( ggml_vdotq_s32(vdupq_n_s32(0), x1_0, y1_0), - ggml_vdotq_s32(vdupq_n_s32(0), x1_1, y1_1))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d)); + ggml_vdotq_s32(vdupq_n_s32(0), x1_1, y1_1))), GGML_CPU_FP16_TO_FP32(x1->d)*GGML_CPU_FP16_TO_FP32(y1->d)); } sumf = vaddvq_f32(sumv0) + vaddvq_f32(sumv1); @@ -1059,7 +1060,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -1217,7 +1218,7 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo const int16x8_t ysum0 = vld1q_s16(y[i].bsums); const int16x8_t ysum1 = vld1q_s16(y[i].bsums + 8); - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; #if defined(__ARM_FEATURE_DOTPROD) sumi0 = vaddq_s32(sumi0, sumi1); @@ -1269,7 +1270,7 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } } - sumf += (float) sum * (GGML_FP16_TO_FP32(x[i].d) * y[i].d); + sumf += (float) sum * (GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d); } *s = sumf; @@ -1362,7 +1363,7 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo const int16x8_t ysum0 = vld1q_s16(y[i].bsums); const int16x8_t ysum1 = vld1q_s16(y[i].bsums + 8); - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; #if defined(__ARM_FEATURE_DOTPROD) sumi0 = vaddq_s32(sumi0, sumi1); @@ -1393,7 +1394,7 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); sumf += (float) sumi * d; } @@ -1425,9 +1426,9 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi switch (vector_length) { case 128: for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); svfloat32_t d_broad = svdup_n_f32((float32_t)d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; @@ -1570,9 +1571,9 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi case 256: case 512: for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); svfloat32_t d_broad = svdup_n_f32((float32_t)d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); svfloat32_t dmin_broad = svdup_n_f32((float32_t)dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; @@ -1671,8 +1672,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sum = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1742,8 +1743,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -1805,7 +1806,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q3_sv = x[i].qs; const uint8_t * GGML_RESTRICT qh_sv = x[i].hmask; @@ -1981,7 +1982,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].hmask; @@ -2112,7 +2113,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2258,18 +2259,18 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi bias[3] = vaddvq_s32(vaddq_s32(vmull_s16(vget_low_s16(y1_sums), vget_low_s16(x1_mins)), vmull_s16(vget_high_s16(y1_sums), vget_high_s16(x1_mins)))); const float32x4_t dmins = { - GGML_FP16_TO_FP32(x0->dmin) * y0->d, - GGML_FP16_TO_FP32(x0->dmin) * y1->d, - GGML_FP16_TO_FP32(x1->dmin) * y0->d, - GGML_FP16_TO_FP32(x1->dmin) * y1->d, + GGML_CPU_FP16_TO_FP32(x0->dmin) * y0->d, + GGML_CPU_FP16_TO_FP32(x0->dmin) * y1->d, + GGML_CPU_FP16_TO_FP32(x1->dmin) * y0->d, + GGML_CPU_FP16_TO_FP32(x1->dmin) * y1->d, }; vfsum = vmlsq_f32(vfsum, vcvtq_f32_s32(vld1q_s32(bias)), dmins); const float32x4_t superblock_scale = { - GGML_FP16_TO_FP32(x0->d) * y0->d, - GGML_FP16_TO_FP32(x0->d) * y1->d, - GGML_FP16_TO_FP32(x1->d) * y0->d, - GGML_FP16_TO_FP32(x1->d) * y1->d, + GGML_CPU_FP16_TO_FP32(x0->d) * y0->d, + GGML_CPU_FP16_TO_FP32(x0->d) * y1->d, + GGML_CPU_FP16_TO_FP32(x1->d) * y0->d, + GGML_CPU_FP16_TO_FP32(x1->d) * y1->d, }; vfsum = vmlaq_f32(vfsum, vcvtq_f32_s32(visum), superblock_scale); } @@ -2289,8 +2290,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8)); @@ -2377,8 +2378,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8)); @@ -2478,9 +2479,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2520,8 +2521,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const int16x8_t q8sums = vpaddq_s16(vld1q_s16(y[i].bsums), vld1q_s16(y[i].bsums + 8)); @@ -2630,9 +2631,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2827,10 +2828,10 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const int32x4_t vibias = vmulq_n_s32(vld1q_s32(bias), 32); const float32x4_t superblock_scale = { - GGML_FP16_TO_FP32(x0->d) * y0->d, - GGML_FP16_TO_FP32(x0->d) * y1->d, - GGML_FP16_TO_FP32(x1->d) * y0->d, - GGML_FP16_TO_FP32(x1->d) * y1->d, + GGML_CPU_FP16_TO_FP32(x0->d) * y0->d, + GGML_CPU_FP16_TO_FP32(x0->d) * y1->d, + GGML_CPU_FP16_TO_FP32(x1->d) * y0->d, + GGML_CPU_FP16_TO_FP32(x1->d) * y1->d, }; visum = vsubq_s32(visum, vibias); @@ -2858,7 +2859,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi svuint8_t q6h_1, q6h_2, q6h_3, q6h_4; for (int i = 0; i < nb; ++i) { - const float d_all = GGML_FP16_TO_FP32(x[i].d); + const float d_all = GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q6 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -3011,7 +3012,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d_all = GGML_FP16_TO_FP32(x[i].d); + const float d_all = GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q6 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -3128,7 +3129,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -3199,7 +3200,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; float sumf1 = 0, sumf2 = 0; @@ -3234,7 +3235,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; int32_t bsum = 0; @@ -3284,7 +3285,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; const uint8x8_t scales8 = vld1_u8(x[i].scales); @@ -3329,7 +3330,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const uint8_t * GGML_RESTRICT sc = x[i].scales; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3398,7 +3399,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -3458,7 +3459,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const int8_t * q8 = y[i].qs; const uint8_t * qs = x[i].qs; const uint8_t * qh = x[i].qh; @@ -3521,7 +3522,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3557,7 +3558,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3630,7 +3631,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs; @@ -3691,7 +3692,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint8_t * GGML_RESTRICT signs = x[i].signs; @@ -3786,7 +3787,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } - sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2 + IQ1S_DELTA * sumi3); + sumf += y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2 + IQ1S_DELTA * sumi3); } *s = sumf; @@ -3817,7 +3818,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); + sumf += GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; @@ -3905,7 +3906,7 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } - sumf += y[i].d * GGML_FP16_TO_FP32(scale.f16) * (vaddvq_s32(sumi1) + IQ1M_DELTA * vaddvq_s32(sumi2)); + sumf += y[i].d * GGML_CPU_FP16_TO_FP32(scale.f16) * (vaddvq_s32(sumi1) + IQ1M_DELTA * vaddvq_s32(sumi2)); } *s = sumf; @@ -3952,7 +3953,7 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qh += 2; } - sumf += GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); + sumf += GGML_CPU_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); } *s = sumf; @@ -4003,13 +4004,13 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v prod_2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q4b.val[2], q8b.val[2]), q4b.val[3], q8b.val[3]); sumf += - GGML_FP16_TO_FP32(x[ib+0].d) * GGML_FP16_TO_FP32(y[ib + 0].d) * vaddvq_s32(prod_1) + - GGML_FP16_TO_FP32(x[ib+1].d) * GGML_FP16_TO_FP32(y[ib + 1].d) * vaddvq_s32(prod_2); + GGML_CPU_FP16_TO_FP32(x[ib+0].d) * GGML_CPU_FP16_TO_FP32(y[ib + 0].d) * vaddvq_s32(prod_1) + + GGML_CPU_FP16_TO_FP32(x[ib+1].d) * GGML_CPU_FP16_TO_FP32(y[ib + 1].d) * vaddvq_s32(prod_2); } #endif for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -4071,7 +4072,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v } - sumf += GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d * (sumi1 + sumi2); + sumf += GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d * (sumi1 + sumi2); } *s = sumf; @@ -4079,7 +4080,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v #else float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; diff --git a/ggml/src/ggml-cpu/arch/arm/repack.cpp b/ggml/src/ggml-cpu/arch/arm/repack.cpp index 9337e01b62390..2f8bc9e251735 100644 --- a/ggml/src/ggml-cpu/arch/arm/repack.cpp +++ b/ggml/src/ggml-cpu/arch/arm/repack.cpp @@ -6,6 +6,7 @@ #include "ggml-impl.h" #include "ggml-cpu.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "traits.h" #include @@ -51,7 +52,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < 8; j++) { @@ -102,7 +103,7 @@ void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTR const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < QK8_0 * 4; j++) { @@ -145,7 +146,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < 4; j++) { @@ -221,7 +222,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < QK8_0 * 4; j++) { @@ -256,45 +257,43 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(blocklen); #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) - if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; - - for (int c = 0; c < nc; c += ncols_interleaved) { - const block_q8_0 * a_ptr = (const block_q8_0 *) vy; - float32x4_t acc = vdupq_n_f32(0); - for (int b = 0; b < nb; b++) { - int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); - int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); - int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); - int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); - float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); - - int8x16_t a0 = vld1q_s8(a_ptr->qs); - int8x16_t a1 = vld1q_s8(a_ptr->qs + qk/2); - float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); - - int32x4_t ret = vdupq_n_s32(0); - - ret = vdotq_laneq_s32(ret, b0 << 4, a0, 0); - ret = vdotq_laneq_s32(ret, b1 << 4, a0, 1); - ret = vdotq_laneq_s32(ret, b2 << 4, a0, 2); - ret = vdotq_laneq_s32(ret, b3 << 4, a0, 3); - - ret = vdotq_laneq_s32(ret, b0 & 0xf0U, a1, 0); - ret = vdotq_laneq_s32(ret, b1 & 0xf0U, a1, 1); - ret = vdotq_laneq_s32(ret, b2 & 0xf0U, a1, 2); - ret = vdotq_laneq_s32(ret, b3 & 0xf0U, a1, 3); - - acc = vfmaq_f32(acc, vcvtq_n_f32_s32(ret, 4), - vmulq_f32(vcvt_f32_f16(ad), vcvt_f32_f16(bd))); - a_ptr++; - b_ptr++; - } - vst1q_f32(s, acc); - s += ncols_interleaved; + const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; + + for (int c = 0; c < nc; c += ncols_interleaved) { + const block_q8_0 * a_ptr = (const block_q8_0 *) vy; + float32x4_t acc = vdupq_n_f32(0); + for (int b = 0; b < nb; b++) { + int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); + int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); + int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); + int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); + float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); + + int8x16_t a0 = vld1q_s8(a_ptr->qs); + int8x16_t a1 = vld1q_s8(a_ptr->qs + qk/2); + float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); + + int32x4_t ret = vdupq_n_s32(0); + + ret = vdotq_laneq_s32(ret, b0 << 4, a0, 0); + ret = vdotq_laneq_s32(ret, b1 << 4, a0, 1); + ret = vdotq_laneq_s32(ret, b2 << 4, a0, 2); + ret = vdotq_laneq_s32(ret, b3 << 4, a0, 3); + + ret = vdotq_laneq_s32(ret, b0 & 0xf0U, a1, 0); + ret = vdotq_laneq_s32(ret, b1 & 0xf0U, a1, 1); + ret = vdotq_laneq_s32(ret, b2 & 0xf0U, a1, 2); + ret = vdotq_laneq_s32(ret, b3 & 0xf0U, a1, 3); + + acc = vfmaq_f32(acc, vcvtq_n_f32_s32(ret, 4), + vmulq_f32(vcvt_f32_f16(ad), vcvt_f32_f16(bd))); + a_ptr++; + b_ptr++; } - return; + vst1q_f32(s, acc); + s += ncols_interleaved; } + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) float sumf[4]; int sumi; @@ -313,7 +312,7 @@ void ggml_gemv_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -341,50 +340,48 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(blocklen); #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) - if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; - - for (int c = 0; c < nc; c += ncols_interleaved) { - const block_q8_0 * a_ptr = (const block_q8_0 *) vy; - float32x4_t acc = vdupq_n_f32(0); - for (int b = 0; b < nb; b++) { - int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); - int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); - int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); - int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); - float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); - - int8x16_t a0 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs); - int8x16_t a1 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 1); - int8x16_t a2 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 2); - int8x16_t a3 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 3); - float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); - - int32x4_t ret0 = vdupq_n_s32(0); - int32x4_t ret1 = vdupq_n_s32(0); - - ret0 = vdotq_s32(ret0, b0 << 4, a0); - ret1 = vdotq_s32(ret1, b1 << 4, a0); - ret0 = vdotq_s32(ret0, b2 << 4, a1); - ret1 = vdotq_s32(ret1, b3 << 4, a1); - - ret0 = vdotq_s32(ret0, b0 & 0xf0U, a2); - ret1 = vdotq_s32(ret1, b1 & 0xf0U, a2); - ret0 = vdotq_s32(ret0, b2 & 0xf0U, a3); - ret1 = vdotq_s32(ret1, b3 & 0xf0U, a3); - - int32x4_t ret = vpaddq_s32(ret0, ret1); - - acc = vfmaq_f32(acc, vcvtq_n_f32_s32(ret, 4), - vmulq_f32(vcvt_f32_f16(ad), vcvt_f32_f16(bd))); - a_ptr++; - b_ptr++; - } - vst1q_f32(s, acc); - s += ncols_interleaved; + const block_q4_0x4 * b_ptr = (const block_q4_0x4 *) vx; + + for (int c = 0; c < nc; c += ncols_interleaved) { + const block_q8_0 * a_ptr = (const block_q8_0 *) vy; + float32x4_t acc = vdupq_n_f32(0); + for (int b = 0; b < nb; b++) { + int8x16_t b0 = vld1q_s8((const int8_t *) b_ptr->qs); + int8x16_t b1 = vld1q_s8((const int8_t *) b_ptr->qs + 16); + int8x16_t b2 = vld1q_s8((const int8_t *) b_ptr->qs + 32); + int8x16_t b3 = vld1q_s8((const int8_t *) b_ptr->qs + 48); + float16x4_t bd = vld1_f16((const __fp16 *) b_ptr->d); + + int8x16_t a0 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs); + int8x16_t a1 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 1); + int8x16_t a2 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 2); + int8x16_t a3 = (int8x16_t) vld1q_dup_s64((const int64_t *) a_ptr->qs + 3); + float16x4_t ad = vld1_dup_f16((const __fp16 *) &a_ptr->d); + + int32x4_t ret0 = vdupq_n_s32(0); + int32x4_t ret1 = vdupq_n_s32(0); + + ret0 = vdotq_s32(ret0, b0 << 4, a0); + ret1 = vdotq_s32(ret1, b1 << 4, a0); + ret0 = vdotq_s32(ret0, b2 << 4, a1); + ret1 = vdotq_s32(ret1, b3 << 4, a1); + + ret0 = vdotq_s32(ret0, b0 & 0xf0U, a2); + ret1 = vdotq_s32(ret1, b1 & 0xf0U, a2); + ret0 = vdotq_s32(ret0, b2 & 0xf0U, a3); + ret1 = vdotq_s32(ret1, b3 & 0xf0U, a3); + + int32x4_t ret = vpaddq_s32(ret0, ret1); + + acc = vfmaq_f32(acc, vcvtq_n_f32_s32(ret, 4), + vmulq_f32(vcvt_f32_f16(ad), vcvt_f32_f16(bd))); + a_ptr++; + b_ptr++; } - return; + vst1q_f32(s, acc); + s += ncols_interleaved; } + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) float sumf[4]; int sumi; @@ -403,7 +400,7 @@ void ggml_gemv_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -432,7 +429,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) #if defined(__ARM_FEATURE_SVE) - if (ggml_cpu_has_sve() && ggml_cpu_get_sve_cnt() == QK8_0) { + if (ggml_cpu_get_sve_cnt() == QK8_0) { const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -518,7 +515,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -547,54 +544,52 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const UNUSED(blocklen); #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) - if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl); - const block_q8_0 * a_ptr = (const block_q8_0 *) vy; - float * res_ptr = s; - - for (int x = 0; x < nc / ncols_interleaved; x++) { - const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb); - - float32x4_t sumf = vdupq_n_f32(0); - for (int l = 0; l < nb; l++) { - uint8x16_t b_0 = vld1q_u8(b_ptr[l].qs + 0); - uint8x16_t b_1 = vld1q_u8(b_ptr[l].qs + 16); - uint8x16_t b_2 = vld1q_u8(b_ptr[l].qs + 32); - uint8x16_t b_3 = vld1q_u8(b_ptr[l].qs + 48); - - int8x16_t b_0_hi = vqtbl1q_s8(kvalues, b_0 >> 4); - int8x16_t b_0_lo = vqtbl1q_s8(kvalues, b_0 & 0x0F); - int8x16_t b_1_hi = vqtbl1q_s8(kvalues, b_1 >> 4); - int8x16_t b_1_lo = vqtbl1q_s8(kvalues, b_1 & 0x0F); - int8x16_t b_2_hi = vqtbl1q_s8(kvalues, b_2 >> 4); - int8x16_t b_2_lo = vqtbl1q_s8(kvalues, b_2 & 0x0F); - int8x16_t b_3_hi = vqtbl1q_s8(kvalues, b_3 >> 4); - int8x16_t b_3_lo = vqtbl1q_s8(kvalues, b_3 & 0x0F); - - int8x16_t a_0 = vld1q_s8(a_ptr[l].qs + 0); - int8x16_t a_1 = vld1q_s8(a_ptr[l].qs + 16); - - int32x4_t sumi = vdupq_n_s32(0); - sumi = vdotq_laneq_s32(sumi, b_0_lo, a_0, 0); - sumi = vdotq_laneq_s32(sumi, b_0_hi, a_1, 0); - sumi = vdotq_laneq_s32(sumi, b_1_lo, a_0, 1); - sumi = vdotq_laneq_s32(sumi, b_1_hi, a_1, 1); - sumi = vdotq_laneq_s32(sumi, b_2_lo, a_0, 2); - sumi = vdotq_laneq_s32(sumi, b_2_hi, a_1, 2); - sumi = vdotq_laneq_s32(sumi, b_3_lo, a_0, 3); - sumi = vdotq_laneq_s32(sumi, b_3_hi, a_1, 3); - - float32x4_t a_d = vcvt_f32_f16(vld1_dup_f16((const float16_t *)&a_ptr[l].d)); - float32x4_t b_d = vcvt_f32_f16(vld1_f16((const float16_t *)b_ptr[l].d)); - float32x4_t d = a_d * b_d; + const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl); + const block_q8_0 * a_ptr = (const block_q8_0 *) vy; + float * res_ptr = s; - sumf = vmlaq_f32(sumf, d, vcvtq_f32_s32(sumi)); - } + for (int x = 0; x < nc / ncols_interleaved; x++) { + const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb); - vst1q_f32(res_ptr + x * 4, sumf); + float32x4_t sumf = vdupq_n_f32(0); + for (int l = 0; l < nb; l++) { + uint8x16_t b_0 = vld1q_u8(b_ptr[l].qs + 0); + uint8x16_t b_1 = vld1q_u8(b_ptr[l].qs + 16); + uint8x16_t b_2 = vld1q_u8(b_ptr[l].qs + 32); + uint8x16_t b_3 = vld1q_u8(b_ptr[l].qs + 48); + + int8x16_t b_0_hi = vqtbl1q_s8(kvalues, b_0 >> 4); + int8x16_t b_0_lo = vqtbl1q_s8(kvalues, b_0 & 0x0F); + int8x16_t b_1_hi = vqtbl1q_s8(kvalues, b_1 >> 4); + int8x16_t b_1_lo = vqtbl1q_s8(kvalues, b_1 & 0x0F); + int8x16_t b_2_hi = vqtbl1q_s8(kvalues, b_2 >> 4); + int8x16_t b_2_lo = vqtbl1q_s8(kvalues, b_2 & 0x0F); + int8x16_t b_3_hi = vqtbl1q_s8(kvalues, b_3 >> 4); + int8x16_t b_3_lo = vqtbl1q_s8(kvalues, b_3 & 0x0F); + + int8x16_t a_0 = vld1q_s8(a_ptr[l].qs + 0); + int8x16_t a_1 = vld1q_s8(a_ptr[l].qs + 16); + + int32x4_t sumi = vdupq_n_s32(0); + sumi = vdotq_laneq_s32(sumi, b_0_lo, a_0, 0); + sumi = vdotq_laneq_s32(sumi, b_0_hi, a_1, 0); + sumi = vdotq_laneq_s32(sumi, b_1_lo, a_0, 1); + sumi = vdotq_laneq_s32(sumi, b_1_hi, a_1, 1); + sumi = vdotq_laneq_s32(sumi, b_2_lo, a_0, 2); + sumi = vdotq_laneq_s32(sumi, b_2_hi, a_1, 2); + sumi = vdotq_laneq_s32(sumi, b_3_lo, a_0, 3); + sumi = vdotq_laneq_s32(sumi, b_3_hi, a_1, 3); + + float32x4_t a_d = vcvt_f32_f16(vld1_dup_f16((const float16_t *)&a_ptr[l].d)); + float32x4_t b_d = vcvt_f32_f16(vld1_f16((const float16_t *)b_ptr[l].d)); + float32x4_t d = a_d * b_d; + + sumf = vmlaq_f32(sumf, d, vcvtq_f32_s32(sumi)); } - return; + + vst1q_f32(res_ptr + x * 4, sumf); } + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) { float sumf[4]; @@ -614,7 +609,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4]; sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])); } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -643,465 +638,463 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(ncols_interleaved); UNUSED(blocklen); -#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) - if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const void * b_ptr = vx; - const void * a_ptr = vy; - float * res_ptr = s; - size_t res_stride = bs * sizeof(float); - - __asm__ __volatile__( - "mov x10, %x[nr]\n" - "mov x9, #0x88\n" - "cmp x10, #0x10\n" - "mul x9, %x[nb], x9\n" - "blt 4f\n" - "1:" // Row loop - "add x28, %x[b_ptr], #0x8\n" - "mov x27, %x[nc]\n" - "add x26, %x[res_ptr], %x[res_stride], LSL #4\n" - "2:" // Column loop - "add x25, %x[a_ptr], #0x8\n" - "movi v15.16b, #0x0\n" - "movi v19.16b, #0x0\n" - "mov x24, %x[nb]\n" - "add x23, x25, x9\n" - "movi v18.16b, #0x0\n" - "movi v14.16b, #0x0\n" - "add x22, x23, x9\n" - "movi v11.16b, #0x0\n" - "movi v13.16b, #0x0\n" - "add x21, x22, x9\n" - "movi v23.16b, #0x0\n" - "movi v16.16b, #0x0\n" - "movi v25.16b, #0x0\n" - "movi v7.16b, #0x0\n" - "movi v0.16b, #0x0\n" - "movi v4.16b, #0x0\n" - "movi v5.16b, #0x0\n" - "movi v21.16b, #0x0\n" - "movi v8.16b, #0x0\n" - "movi v1.16b, #0x0\n" - "3:" // Block loop - "ldr q3, [x28, #0x0]\n" - "ldr q31, [x25, #0x0]\n" - "movi v28.16b, #0x4\n" - "movi v10.4s, #0x0\n" - "ldr q22, [x28, #0x10]\n" - "ldr q6, [x25, #0x10]\n" - "movi v29.4s, #0x0\n" - "movi v9.4s, #0x0\n" - "ldr q27, [x28, #0x20]\n" - "ldr q30, [x28, #0x30]\n" - "movi v20.4s, #0x0\n" - "movi v24.16b, #0xf0\n" - "ldr d2, [x25, #-0x8]\n" - "ldr d26, [x23, #-0x8]\n" - "sshl v12.16b, v3.16b, v28.16b\n" - "sub x20, x28, #0x8\n" - "ldr d17, [x20, #0x0]\n" - "and v3.16b, v3.16b, v24.16b\n" - "subs x24, x24, #0x1\n" - "add x28, x28, #0x48\n" - ".inst 0x4f9fe18a // sdot v10.4s, v12.16b, v31.4b[0]\n" - ".inst 0x4fbfe19d // sdot v29.4s, v12.16b, v31.4b[1]\n" - ".inst 0x4f9fe989 // sdot v9.4s, v12.16b, v31.4b[2]\n" - ".inst 0x4fbfe994 // sdot v20.4s, v12.16b, v31.4b[3]\n" - "sshl v31.16b, v22.16b, v28.16b\n" - "and v22.16b, v22.16b, v24.16b\n" - "fcvtl v17.4s, v17.4h\n" - "fcvtl v2.4s, v2.4h\n" - "fcvtl v26.4s, v26.4h\n" - ".inst 0x4f86e3ea // sdot v10.4s, v31.16b, v6.4b[0]\n" - ".inst 0x4fa6e3fd // sdot v29.4s, v31.16b, v6.4b[1]\n" - ".inst 0x4f86ebe9 // sdot v9.4s, v31.16b, v6.4b[2]\n" - ".inst 0x4fa6ebf4 // sdot v20.4s, v31.16b, v6.4b[3]\n" - "sshl v6.16b, v27.16b, v28.16b\n" - "sshl v28.16b, v30.16b, v28.16b\n" - "and v27.16b, v27.16b, v24.16b\n" - "and v30.16b, v30.16b, v24.16b\n" - "ldr q24, [x25, #0x20]\n" - ".inst 0x4f98e0ca // sdot v10.4s, v6.16b, v24.4b[0]\n" - ".inst 0x4fb8e0dd // sdot v29.4s, v6.16b, v24.4b[1]\n" - ".inst 0x4f98e8c9 // sdot v9.4s, v6.16b, v24.4b[2]\n" - ".inst 0x4fb8e8d4 // sdot v20.4s, v6.16b, v24.4b[3]\n" - "ldr q24, [x25, #0x30]\n" - ".inst 0x4f98e38a // sdot v10.4s, v28.16b, v24.4b[0]\n" - ".inst 0x4fb8e39d // sdot v29.4s, v28.16b, v24.4b[1]\n" - ".inst 0x4f98eb89 // sdot v9.4s, v28.16b, v24.4b[2]\n" - ".inst 0x4fb8eb94 // sdot v20.4s, v28.16b, v24.4b[3]\n" - "ldr q24, [x25, #0x40]\n" - ".inst 0x4f98e06a // sdot v10.4s, v3.16b, v24.4b[0]\n" - ".inst 0x4fb8e07d // sdot v29.4s, v3.16b, v24.4b[1]\n" - ".inst 0x4f98e869 // sdot v9.4s, v3.16b, v24.4b[2]\n" - ".inst 0x4fb8e874 // sdot v20.4s, v3.16b, v24.4b[3]\n" - "ldr q24, [x25, #0x50]\n" - ".inst 0x4f98e2ca // sdot v10.4s, v22.16b, v24.4b[0]\n" - ".inst 0x4fb8e2dd // sdot v29.4s, v22.16b, v24.4b[1]\n" - ".inst 0x4f98eac9 // sdot v9.4s, v22.16b, v24.4b[2]\n" - ".inst 0x4fb8ead4 // sdot v20.4s, v22.16b, v24.4b[3]\n" - "ldr q24, [x25, #0x60]\n" - ".inst 0x4f98e36a // sdot v10.4s, v27.16b, v24.4b[0]\n" - ".inst 0x4fb8e37d // sdot v29.4s, v27.16b, v24.4b[1]\n" - ".inst 0x4f98eb69 // sdot v9.4s, v27.16b, v24.4b[2]\n" - ".inst 0x4fb8eb74 // sdot v20.4s, v27.16b, v24.4b[3]\n" - "ldr q24, [x25, #0x70]\n" - "add x25, x25, #0x88\n" - ".inst 0x4f98e3ca // sdot v10.4s, v30.16b, v24.4b[0]\n" - ".inst 0x4fb8e3dd // sdot v29.4s, v30.16b, v24.4b[1]\n" - ".inst 0x4f98ebc9 // sdot v9.4s, v30.16b, v24.4b[2]\n" - ".inst 0x4fb8ebd4 // sdot v20.4s, v30.16b, v24.4b[3]\n" - "fmul v24.4s, v17.4s, v2.s[0]\n" - "scvtf v10.4s, v10.4s, #0x4\n" - "scvtf v29.4s, v29.4s, #0x4\n" - "scvtf v9.4s, v9.4s, #0x4\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "fmla v15.4s, v10.4s, v24.4s\n" - "ldr q24, [x23, #0x0]\n" - "fmul v10.4s, v17.4s, v2.s[1]\n" - "fmla v19.4s, v29.4s, v10.4s\n" - "ldr q10, [x23, #0x10]\n" - "fmul v29.4s, v17.4s, v2.s[2]\n" - "fmul v2.4s, v17.4s, v2.s[3]\n" - "fmla v18.4s, v9.4s, v29.4s\n" - "movi v9.4s, #0x0\n" - "movi v29.4s, #0x0\n" - ".inst 0x4f98e189 // sdot v9.4s, v12.16b, v24.4b[0]\n" - ".inst 0x4fb8e19d // sdot v29.4s, v12.16b, v24.4b[1]\n" - "fmla v14.4s, v20.4s, v2.4s\n" - "movi v20.4s, #0x0\n" - "movi v2.4s, #0x0\n" - ".inst 0x4f98e994 // sdot v20.4s, v12.16b, v24.4b[2]\n" - ".inst 0x4fb8e982 // sdot v2.4s, v12.16b, v24.4b[3]\n" - "ldr q24, [x23, #0x20]\n" - ".inst 0x4f8ae3e9 // sdot v9.4s, v31.16b, v10.4b[0]\n" - ".inst 0x4faae3fd // sdot v29.4s, v31.16b, v10.4b[1]\n" - ".inst 0x4f8aebf4 // sdot v20.4s, v31.16b, v10.4b[2]\n" - ".inst 0x4faaebe2 // sdot v2.4s, v31.16b, v10.4b[3]\n" - "ldr q10, [x23, #0x30]\n" - ".inst 0x4f98e0c9 // sdot v9.4s, v6.16b, v24.4b[0]\n" - ".inst 0x4fb8e0dd // sdot v29.4s, v6.16b, v24.4b[1]\n" - ".inst 0x4f98e8d4 // sdot v20.4s, v6.16b, v24.4b[2]\n" - ".inst 0x4fb8e8c2 // sdot v2.4s, v6.16b, v24.4b[3]\n" - "ldr q24, [x23, #0x40]\n" - ".inst 0x4f8ae389 // sdot v9.4s, v28.16b, v10.4b[0]\n" - ".inst 0x4faae39d // sdot v29.4s, v28.16b, v10.4b[1]\n" - ".inst 0x4f8aeb94 // sdot v20.4s, v28.16b, v10.4b[2]\n" - ".inst 0x4faaeb82 // sdot v2.4s, v28.16b, v10.4b[3]\n" - "ldr q10, [x23, #0x50]\n" - ".inst 0x4f98e069 // sdot v9.4s, v3.16b, v24.4b[0]\n" - ".inst 0x4fb8e07d // sdot v29.4s, v3.16b, v24.4b[1]\n" - ".inst 0x4f98e874 // sdot v20.4s, v3.16b, v24.4b[2]\n" - ".inst 0x4fb8e862 // sdot v2.4s, v3.16b, v24.4b[3]\n" - "ldr q24, [x23, #0x60]\n" - ".inst 0x4f8ae2c9 // sdot v9.4s, v22.16b, v10.4b[0]\n" - ".inst 0x4faae2dd // sdot v29.4s, v22.16b, v10.4b[1]\n" - ".inst 0x4f8aead4 // sdot v20.4s, v22.16b, v10.4b[2]\n" - ".inst 0x4faaeac2 // sdot v2.4s, v22.16b, v10.4b[3]\n" - "ldr q10, [x23, #0x70]\n" - "add x23, x23, #0x88\n" - ".inst 0x4f98e369 // sdot v9.4s, v27.16b, v24.4b[0]\n" - ".inst 0x4fb8e37d // sdot v29.4s, v27.16b, v24.4b[1]\n" - ".inst 0x4f98eb74 // sdot v20.4s, v27.16b, v24.4b[2]\n" - ".inst 0x4fb8eb62 // sdot v2.4s, v27.16b, v24.4b[3]\n" - "ldr q24, [x22, #0x0]\n" - ".inst 0x4f8ae3c9 // sdot v9.4s, v30.16b, v10.4b[0]\n" - ".inst 0x4faae3dd // sdot v29.4s, v30.16b, v10.4b[1]\n" - ".inst 0x4f8aebd4 // sdot v20.4s, v30.16b, v10.4b[2]\n" - ".inst 0x4faaebc2 // sdot v2.4s, v30.16b, v10.4b[3]\n" - "fmul v10.4s, v17.4s, v26.s[0]\n" - "scvtf v9.4s, v9.4s, #0x4\n" - "scvtf v29.4s, v29.4s, #0x4\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "scvtf v2.4s, v2.4s, #0x4\n" - "fmla v11.4s, v9.4s, v10.4s\n" - "ldr q9, [x22, #0x10]\n" - "fmul v10.4s, v17.4s, v26.s[1]\n" - "fmla v13.4s, v29.4s, v10.4s\n" - "ldr d29, [x22, #-0x8]\n" - "fmul v10.4s, v17.4s, v26.s[2]\n" - "fmul v26.4s, v17.4s, v26.s[3]\n" - "fcvtl v29.4s, v29.4h\n" - "fmla v23.4s, v20.4s, v10.4s\n" - "movi v20.4s, #0x0\n" - "movi v10.4s, #0x0\n" - "fmla v16.4s, v2.4s, v26.4s\n" - "movi v26.4s, #0x0\n" - "movi v2.4s, #0x0\n" - ".inst 0x4f98e194 // sdot v20.4s, v12.16b, v24.4b[0]\n" - ".inst 0x4fb8e18a // sdot v10.4s, v12.16b, v24.4b[1]\n" - ".inst 0x4f98e99a // sdot v26.4s, v12.16b, v24.4b[2]\n" - ".inst 0x4fb8e982 // sdot v2.4s, v12.16b, v24.4b[3]\n" - "ldr q24, [x22, #0x20]\n" - ".inst 0x4f89e3f4 // sdot v20.4s, v31.16b, v9.4b[0]\n" - ".inst 0x4fa9e3ea // sdot v10.4s, v31.16b, v9.4b[1]\n" - ".inst 0x4f89ebfa // sdot v26.4s, v31.16b, v9.4b[2]\n" - ".inst 0x4fa9ebe2 // sdot v2.4s, v31.16b, v9.4b[3]\n" - "ldr q9, [x22, #0x30]\n" - ".inst 0x4f98e0d4 // sdot v20.4s, v6.16b, v24.4b[0]\n" - ".inst 0x4fb8e0ca // sdot v10.4s, v6.16b, v24.4b[1]\n" - ".inst 0x4f98e8da // sdot v26.4s, v6.16b, v24.4b[2]\n" - ".inst 0x4fb8e8c2 // sdot v2.4s, v6.16b, v24.4b[3]\n" - "ldr q24, [x22, #0x40]\n" - ".inst 0x4f89e394 // sdot v20.4s, v28.16b, v9.4b[0]\n" - ".inst 0x4fa9e38a // sdot v10.4s, v28.16b, v9.4b[1]\n" - ".inst 0x4f89eb9a // sdot v26.4s, v28.16b, v9.4b[2]\n" - ".inst 0x4fa9eb82 // sdot v2.4s, v28.16b, v9.4b[3]\n" - "ldr q9, [x22, #0x50]\n" - ".inst 0x4f98e074 // sdot v20.4s, v3.16b, v24.4b[0]\n" - ".inst 0x4fb8e06a // sdot v10.4s, v3.16b, v24.4b[1]\n" - ".inst 0x4f98e87a // sdot v26.4s, v3.16b, v24.4b[2]\n" - ".inst 0x4fb8e862 // sdot v2.4s, v3.16b, v24.4b[3]\n" - "ldr q24, [x22, #0x60]\n" - ".inst 0x4f89e2d4 // sdot v20.4s, v22.16b, v9.4b[0]\n" - ".inst 0x4fa9e2ca // sdot v10.4s, v22.16b, v9.4b[1]\n" - ".inst 0x4f89eada // sdot v26.4s, v22.16b, v9.4b[2]\n" - ".inst 0x4fa9eac2 // sdot v2.4s, v22.16b, v9.4b[3]\n" - "ldr q9, [x22, #0x70]\n" - "add x22, x22, #0x88\n" - ".inst 0x4f98e374 // sdot v20.4s, v27.16b, v24.4b[0]\n" - ".inst 0x4fb8e36a // sdot v10.4s, v27.16b, v24.4b[1]\n" - ".inst 0x4f98eb7a // sdot v26.4s, v27.16b, v24.4b[2]\n" - ".inst 0x4fb8eb62 // sdot v2.4s, v27.16b, v24.4b[3]\n" - "ldr q24, [x21, #0x0]\n" - ".inst 0x4f89e3d4 // sdot v20.4s, v30.16b, v9.4b[0]\n" - ".inst 0x4fa9e3ca // sdot v10.4s, v30.16b, v9.4b[1]\n" - ".inst 0x4f89ebda // sdot v26.4s, v30.16b, v9.4b[2]\n" - ".inst 0x4fa9ebc2 // sdot v2.4s, v30.16b, v9.4b[3]\n" - "fmul v9.4s, v17.4s, v29.s[0]\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "scvtf v10.4s, v10.4s, #0x4\n" - "scvtf v26.4s, v26.4s, #0x4\n" - "scvtf v2.4s, v2.4s, #0x4\n" - "fmla v25.4s, v20.4s, v9.4s\n" - "ldr q9, [x21, #0x10]\n" - "fmul v20.4s, v17.4s, v29.s[1]\n" - "fmla v7.4s, v10.4s, v20.4s\n" - "ldr d20, [x21, #-0x8]\n" - "fmul v10.4s, v17.4s, v29.s[2]\n" - "fmul v29.4s, v17.4s, v29.s[3]\n" - "fcvtl v20.4s, v20.4h\n" - "fmla v0.4s, v26.4s, v10.4s\n" - "movi v26.4s, #0x0\n" - "movi v10.4s, #0x0\n" - "fmla v4.4s, v2.4s, v29.4s\n" - "movi v2.4s, #0x0\n" - "movi v29.4s, #0x0\n" - ".inst 0x4f98e19a // sdot v26.4s, v12.16b, v24.4b[0]\n" - ".inst 0x4fb8e18a // sdot v10.4s, v12.16b, v24.4b[1]\n" - ".inst 0x4f98e982 // sdot v2.4s, v12.16b, v24.4b[2]\n" - ".inst 0x4fb8e99d // sdot v29.4s, v12.16b, v24.4b[3]\n" - "ldr q12, [x21, #0x20]\n" - "fmul v24.4s, v17.4s, v20.s[0]\n" - ".inst 0x4f89e3fa // sdot v26.4s, v31.16b, v9.4b[0]\n" - ".inst 0x4fa9e3ea // sdot v10.4s, v31.16b, v9.4b[1]\n" - ".inst 0x4f89ebe2 // sdot v2.4s, v31.16b, v9.4b[2]\n" - ".inst 0x4fa9ebfd // sdot v29.4s, v31.16b, v9.4b[3]\n" - "ldr q9, [x21, #0x30]\n" - "fmul v31.4s, v17.4s, v20.s[1]\n" - ".inst 0x4f8ce0da // sdot v26.4s, v6.16b, v12.4b[0]\n" - ".inst 0x4face0ca // sdot v10.4s, v6.16b, v12.4b[1]\n" - ".inst 0x4f8ce8c2 // sdot v2.4s, v6.16b, v12.4b[2]\n" - ".inst 0x4face8dd // sdot v29.4s, v6.16b, v12.4b[3]\n" - "ldr q12, [x21, #0x40]\n" - "fmul v6.4s, v17.4s, v20.s[2]\n" - "fmul v20.4s, v17.4s, v20.s[3]\n" - ".inst 0x4f89e39a // sdot v26.4s, v28.16b, v9.4b[0]\n" - ".inst 0x4fa9e38a // sdot v10.4s, v28.16b, v9.4b[1]\n" - ".inst 0x4f89eb82 // sdot v2.4s, v28.16b, v9.4b[2]\n" - ".inst 0x4fa9eb9d // sdot v29.4s, v28.16b, v9.4b[3]\n" - "ldr q9, [x21, #0x50]\n" - ".inst 0x4f8ce07a // sdot v26.4s, v3.16b, v12.4b[0]\n" - ".inst 0x4face06a // sdot v10.4s, v3.16b, v12.4b[1]\n" - ".inst 0x4f8ce862 // sdot v2.4s, v3.16b, v12.4b[2]\n" - ".inst 0x4face87d // sdot v29.4s, v3.16b, v12.4b[3]\n" - "ldr q12, [x21, #0x60]\n" - ".inst 0x4f89e2da // sdot v26.4s, v22.16b, v9.4b[0]\n" - ".inst 0x4fa9e2ca // sdot v10.4s, v22.16b, v9.4b[1]\n" - ".inst 0x4f89eac2 // sdot v2.4s, v22.16b, v9.4b[2]\n" - ".inst 0x4fa9eadd // sdot v29.4s, v22.16b, v9.4b[3]\n" - "ldr q17, [x21, #0x70]\n" - "add x21, x21, #0x88\n" - ".inst 0x4f8ce37a // sdot v26.4s, v27.16b, v12.4b[0]\n" - ".inst 0x4face36a // sdot v10.4s, v27.16b, v12.4b[1]\n" - ".inst 0x4f8ceb62 // sdot v2.4s, v27.16b, v12.4b[2]\n" - ".inst 0x4faceb7d // sdot v29.4s, v27.16b, v12.4b[3]\n" - ".inst 0x4f91e3da // sdot v26.4s, v30.16b, v17.4b[0]\n" - ".inst 0x4fb1e3ca // sdot v10.4s, v30.16b, v17.4b[1]\n" - ".inst 0x4f91ebc2 // sdot v2.4s, v30.16b, v17.4b[2]\n" - ".inst 0x4fb1ebdd // sdot v29.4s, v30.16b, v17.4b[3]\n" - "scvtf v26.4s, v26.4s, #0x4\n" - "scvtf v10.4s, v10.4s, #0x4\n" - "fmla v5.4s, v26.4s, v24.4s\n" - "scvtf v2.4s, v2.4s, #0x4\n" - "scvtf v29.4s, v29.4s, #0x4\n" - "fmla v21.4s, v10.4s, v31.4s\n" - "fmla v8.4s, v2.4s, v6.4s\n" - "fmla v1.4s, v29.4s, v20.4s\n" - "bgt 3b\n" - "mov x20, %x[res_ptr]\n" - "subs x27, x27, #0x4\n" - "add %x[res_ptr], %x[res_ptr], #0x10\n" - "str q15, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q19, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q18, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q14, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q11, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q13, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q23, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q16, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q25, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q7, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q0, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q4, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q5, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q21, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q8, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q1, [x20, #0x0]\n" - "bne 2b\n" - "mov x20, #0x4\n" - "sub x10, x10, #0x10\n" - "cmp x10, #0x10\n" - "mov %x[res_ptr], x26\n" - "madd %x[a_ptr], x20, x9, %x[a_ptr]\n" - "bge 1b\n" - "4:" // Row loop skip - "cbz x10, 9f\n" - "5:" // Row tail: Row loop - "add x24, %x[b_ptr], #0x8\n" - "mov x23, %x[nc]\n" - "add x22, %x[res_ptr], %x[res_stride], LSL #2\n" - "6:" // Row tail: Column loop - "movi v15.16b, #0x0\n" - "movi v19.16b, #0x0\n" - "add x25, %x[a_ptr], #0x8\n" - "mov x21, %x[nb]\n" - "movi v18.16b, #0x0\n" - "movi v14.16b, #0x0\n" - "7:" // Row tail: Block loop - "ldr q7, [x24, #0x0]\n" - "ldr q5, [x25, #0x0]\n" - "movi v9.16b, #0x4\n" - "movi v4.4s, #0x0\n" - "ldr q3, [x24, #0x10]\n" - "ldr q2, [x25, #0x10]\n" - "movi v1.4s, #0x0\n" - "movi v0.4s, #0x0\n" - "ldr q13, [x24, #0x20]\n" - "ldr q31, [x25, #0x20]\n" - "movi v30.4s, #0x0\n" - "movi v29.16b, #0xf0\n" - "ldr q28, [x24, #0x30]\n" - "ldr q27, [x25, #0x30]\n" - "sshl v20.16b, v7.16b, v9.16b\n" - "sub x20, x24, #0x8\n" - "ldr q26, [x25, #0x40]\n" - "ldr q25, [x25, #0x50]\n" - "sshl v17.16b, v3.16b, v9.16b\n" - "and v7.16b, v7.16b, v29.16b\n" - "ldr q24, [x25, #0x60]\n" - "ldr q16, [x25, #0x70]\n" - "sshl v22.16b, v13.16b, v9.16b\n" - "and v3.16b, v3.16b, v29.16b\n" - "ldr d21, [x20, #0x0]\n" - "ldr d12, [x25, #-0x8]\n" - ".inst 0x4f85e284 // sdot v4.4s, v20.16b, v5.4b[0]\n" - ".inst 0x4fa5e281 // sdot v1.4s, v20.16b, v5.4b[1]\n" - ".inst 0x4f85ea80 // sdot v0.4s, v20.16b, v5.4b[2]\n" - ".inst 0x4fa5ea9e // sdot v30.4s, v20.16b, v5.4b[3]\n" - "sshl v9.16b, v28.16b, v9.16b\n" - "subs x21, x21, #0x1\n" - "and v13.16b, v13.16b, v29.16b\n" - "and v28.16b, v28.16b, v29.16b\n" - "add x25, x25, #0x88\n" - "add x24, x24, #0x48\n" - "fcvtl v21.4s, v21.4h\n" - "fcvtl v12.4s, v12.4h\n" - ".inst 0x4f82e224 // sdot v4.4s, v17.16b, v2.4b[0]\n" - ".inst 0x4fa2e221 // sdot v1.4s, v17.16b, v2.4b[1]\n" - ".inst 0x4f82ea20 // sdot v0.4s, v17.16b, v2.4b[2]\n" - ".inst 0x4fa2ea3e // sdot v30.4s, v17.16b, v2.4b[3]\n" - "fmul v11.4s, v21.4s, v12.s[0]\n" - "fmul v23.4s, v21.4s, v12.s[1]\n" - "fmul v17.4s, v21.4s, v12.s[2]\n" - ".inst 0x4f9fe2c4 // sdot v4.4s, v22.16b, v31.4b[0]\n" - "fmul v6.4s, v21.4s, v12.s[3]\n" - ".inst 0x4fbfe2c1 // sdot v1.4s, v22.16b, v31.4b[1]\n" - ".inst 0x4f9feac0 // sdot v0.4s, v22.16b, v31.4b[2]\n" - ".inst 0x4fbfeade // sdot v30.4s, v22.16b, v31.4b[3]\n" - ".inst 0x4f9be124 // sdot v4.4s, v9.16b, v27.4b[0]\n" - ".inst 0x4fbbe121 // sdot v1.4s, v9.16b, v27.4b[1]\n" - ".inst 0x4f9be920 // sdot v0.4s, v9.16b, v27.4b[2]\n" - ".inst 0x4fbbe93e // sdot v30.4s, v9.16b, v27.4b[3]\n" - ".inst 0x4f9ae0e4 // sdot v4.4s, v7.16b, v26.4b[0]\n" - ".inst 0x4fbae0e1 // sdot v1.4s, v7.16b, v26.4b[1]\n" - ".inst 0x4f9ae8e0 // sdot v0.4s, v7.16b, v26.4b[2]\n" - ".inst 0x4fbae8fe // sdot v30.4s, v7.16b, v26.4b[3]\n" - ".inst 0x4f99e064 // sdot v4.4s, v3.16b, v25.4b[0]\n" - ".inst 0x4fb9e061 // sdot v1.4s, v3.16b, v25.4b[1]\n" - ".inst 0x4f99e860 // sdot v0.4s, v3.16b, v25.4b[2]\n" - ".inst 0x4fb9e87e // sdot v30.4s, v3.16b, v25.4b[3]\n" - ".inst 0x4f98e1a4 // sdot v4.4s, v13.16b, v24.4b[0]\n" - ".inst 0x4fb8e1a1 // sdot v1.4s, v13.16b, v24.4b[1]\n" - ".inst 0x4f98e9a0 // sdot v0.4s, v13.16b, v24.4b[2]\n" - ".inst 0x4fb8e9be // sdot v30.4s, v13.16b, v24.4b[3]\n" - ".inst 0x4f90e384 // sdot v4.4s, v28.16b, v16.4b[0]\n" - ".inst 0x4fb0e381 // sdot v1.4s, v28.16b, v16.4b[1]\n" - ".inst 0x4f90eb80 // sdot v0.4s, v28.16b, v16.4b[2]\n" - ".inst 0x4fb0eb9e // sdot v30.4s, v28.16b, v16.4b[3]\n" - "scvtf v4.4s, v4.4s, #0x4\n" - "scvtf v1.4s, v1.4s, #0x4\n" - "scvtf v0.4s, v0.4s, #0x4\n" - "fmla v15.4s, v4.4s, v11.4s\n" - "scvtf v30.4s, v30.4s, #0x4\n" - "fmla v19.4s, v1.4s, v23.4s\n" - "fmla v18.4s, v0.4s, v17.4s\n" - "fmla v14.4s, v30.4s, v6.4s\n" - "bgt 7b\n" - "mov x20, %x[res_ptr]\n" - "cmp x10, #0x1\n" - "str q15, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "cmp x10, #0x2\n" - "str q19, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "cmp x10, #0x3\n" - "str q18, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "str q14, [x20, #0x0]\n" - "8:" // Row tail: Accumulator store skip - "subs x23, x23, #0x4\n" - "add %x[res_ptr], %x[res_ptr], #0x10\n" - "bne 6b\n" - "subs x10, x10, #0x4\n" - "add %x[a_ptr], %x[a_ptr], x9\n" - "mov %x[res_ptr], x22\n" - "bgt 5b\n" - "9:" // Row tail: Row loop skip - : [a_ptr] "+&r" (a_ptr), [res_ptr] "+&r" (res_ptr) - : [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" - ); - return; - } +#if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) + const void * b_ptr = vx; + const void * a_ptr = vy; + float * res_ptr = s; + size_t res_stride = bs * sizeof(float); + + __asm__ __volatile__( + "mov x10, %x[nr]\n" + "mov x9, #0x88\n" + "cmp x10, #0x10\n" + "mul x9, %x[nb], x9\n" + "blt 4f\n" + "1:" // Row loop + "add x28, %x[b_ptr], #0x8\n" + "mov x27, %x[nc]\n" + "add x26, %x[res_ptr], %x[res_stride], LSL #4\n" + "2:" // Column loop + "add x25, %x[a_ptr], #0x8\n" + "movi v15.16b, #0x0\n" + "movi v19.16b, #0x0\n" + "mov x24, %x[nb]\n" + "add x23, x25, x9\n" + "movi v18.16b, #0x0\n" + "movi v14.16b, #0x0\n" + "add x22, x23, x9\n" + "movi v11.16b, #0x0\n" + "movi v13.16b, #0x0\n" + "add x21, x22, x9\n" + "movi v23.16b, #0x0\n" + "movi v16.16b, #0x0\n" + "movi v25.16b, #0x0\n" + "movi v7.16b, #0x0\n" + "movi v0.16b, #0x0\n" + "movi v4.16b, #0x0\n" + "movi v5.16b, #0x0\n" + "movi v21.16b, #0x0\n" + "movi v8.16b, #0x0\n" + "movi v1.16b, #0x0\n" + "3:" // Block loop + "ldr q3, [x28, #0x0]\n" + "ldr q31, [x25, #0x0]\n" + "movi v28.16b, #0x4\n" + "movi v10.4s, #0x0\n" + "ldr q22, [x28, #0x10]\n" + "ldr q6, [x25, #0x10]\n" + "movi v29.4s, #0x0\n" + "movi v9.4s, #0x0\n" + "ldr q27, [x28, #0x20]\n" + "ldr q30, [x28, #0x30]\n" + "movi v20.4s, #0x0\n" + "movi v24.16b, #0xf0\n" + "ldr d2, [x25, #-0x8]\n" + "ldr d26, [x23, #-0x8]\n" + "sshl v12.16b, v3.16b, v28.16b\n" + "sub x20, x28, #0x8\n" + "ldr d17, [x20, #0x0]\n" + "and v3.16b, v3.16b, v24.16b\n" + "subs x24, x24, #0x1\n" + "add x28, x28, #0x48\n" + ".inst 0x4f9fe18a // sdot v10.4s, v12.16b, v31.4b[0]\n" + ".inst 0x4fbfe19d // sdot v29.4s, v12.16b, v31.4b[1]\n" + ".inst 0x4f9fe989 // sdot v9.4s, v12.16b, v31.4b[2]\n" + ".inst 0x4fbfe994 // sdot v20.4s, v12.16b, v31.4b[3]\n" + "sshl v31.16b, v22.16b, v28.16b\n" + "and v22.16b, v22.16b, v24.16b\n" + "fcvtl v17.4s, v17.4h\n" + "fcvtl v2.4s, v2.4h\n" + "fcvtl v26.4s, v26.4h\n" + ".inst 0x4f86e3ea // sdot v10.4s, v31.16b, v6.4b[0]\n" + ".inst 0x4fa6e3fd // sdot v29.4s, v31.16b, v6.4b[1]\n" + ".inst 0x4f86ebe9 // sdot v9.4s, v31.16b, v6.4b[2]\n" + ".inst 0x4fa6ebf4 // sdot v20.4s, v31.16b, v6.4b[3]\n" + "sshl v6.16b, v27.16b, v28.16b\n" + "sshl v28.16b, v30.16b, v28.16b\n" + "and v27.16b, v27.16b, v24.16b\n" + "and v30.16b, v30.16b, v24.16b\n" + "ldr q24, [x25, #0x20]\n" + ".inst 0x4f98e0ca // sdot v10.4s, v6.16b, v24.4b[0]\n" + ".inst 0x4fb8e0dd // sdot v29.4s, v6.16b, v24.4b[1]\n" + ".inst 0x4f98e8c9 // sdot v9.4s, v6.16b, v24.4b[2]\n" + ".inst 0x4fb8e8d4 // sdot v20.4s, v6.16b, v24.4b[3]\n" + "ldr q24, [x25, #0x30]\n" + ".inst 0x4f98e38a // sdot v10.4s, v28.16b, v24.4b[0]\n" + ".inst 0x4fb8e39d // sdot v29.4s, v28.16b, v24.4b[1]\n" + ".inst 0x4f98eb89 // sdot v9.4s, v28.16b, v24.4b[2]\n" + ".inst 0x4fb8eb94 // sdot v20.4s, v28.16b, v24.4b[3]\n" + "ldr q24, [x25, #0x40]\n" + ".inst 0x4f98e06a // sdot v10.4s, v3.16b, v24.4b[0]\n" + ".inst 0x4fb8e07d // sdot v29.4s, v3.16b, v24.4b[1]\n" + ".inst 0x4f98e869 // sdot v9.4s, v3.16b, v24.4b[2]\n" + ".inst 0x4fb8e874 // sdot v20.4s, v3.16b, v24.4b[3]\n" + "ldr q24, [x25, #0x50]\n" + ".inst 0x4f98e2ca // sdot v10.4s, v22.16b, v24.4b[0]\n" + ".inst 0x4fb8e2dd // sdot v29.4s, v22.16b, v24.4b[1]\n" + ".inst 0x4f98eac9 // sdot v9.4s, v22.16b, v24.4b[2]\n" + ".inst 0x4fb8ead4 // sdot v20.4s, v22.16b, v24.4b[3]\n" + "ldr q24, [x25, #0x60]\n" + ".inst 0x4f98e36a // sdot v10.4s, v27.16b, v24.4b[0]\n" + ".inst 0x4fb8e37d // sdot v29.4s, v27.16b, v24.4b[1]\n" + ".inst 0x4f98eb69 // sdot v9.4s, v27.16b, v24.4b[2]\n" + ".inst 0x4fb8eb74 // sdot v20.4s, v27.16b, v24.4b[3]\n" + "ldr q24, [x25, #0x70]\n" + "add x25, x25, #0x88\n" + ".inst 0x4f98e3ca // sdot v10.4s, v30.16b, v24.4b[0]\n" + ".inst 0x4fb8e3dd // sdot v29.4s, v30.16b, v24.4b[1]\n" + ".inst 0x4f98ebc9 // sdot v9.4s, v30.16b, v24.4b[2]\n" + ".inst 0x4fb8ebd4 // sdot v20.4s, v30.16b, v24.4b[3]\n" + "fmul v24.4s, v17.4s, v2.s[0]\n" + "scvtf v10.4s, v10.4s, #0x4\n" + "scvtf v29.4s, v29.4s, #0x4\n" + "scvtf v9.4s, v9.4s, #0x4\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "fmla v15.4s, v10.4s, v24.4s\n" + "ldr q24, [x23, #0x0]\n" + "fmul v10.4s, v17.4s, v2.s[1]\n" + "fmla v19.4s, v29.4s, v10.4s\n" + "ldr q10, [x23, #0x10]\n" + "fmul v29.4s, v17.4s, v2.s[2]\n" + "fmul v2.4s, v17.4s, v2.s[3]\n" + "fmla v18.4s, v9.4s, v29.4s\n" + "movi v9.4s, #0x0\n" + "movi v29.4s, #0x0\n" + ".inst 0x4f98e189 // sdot v9.4s, v12.16b, v24.4b[0]\n" + ".inst 0x4fb8e19d // sdot v29.4s, v12.16b, v24.4b[1]\n" + "fmla v14.4s, v20.4s, v2.4s\n" + "movi v20.4s, #0x0\n" + "movi v2.4s, #0x0\n" + ".inst 0x4f98e994 // sdot v20.4s, v12.16b, v24.4b[2]\n" + ".inst 0x4fb8e982 // sdot v2.4s, v12.16b, v24.4b[3]\n" + "ldr q24, [x23, #0x20]\n" + ".inst 0x4f8ae3e9 // sdot v9.4s, v31.16b, v10.4b[0]\n" + ".inst 0x4faae3fd // sdot v29.4s, v31.16b, v10.4b[1]\n" + ".inst 0x4f8aebf4 // sdot v20.4s, v31.16b, v10.4b[2]\n" + ".inst 0x4faaebe2 // sdot v2.4s, v31.16b, v10.4b[3]\n" + "ldr q10, [x23, #0x30]\n" + ".inst 0x4f98e0c9 // sdot v9.4s, v6.16b, v24.4b[0]\n" + ".inst 0x4fb8e0dd // sdot v29.4s, v6.16b, v24.4b[1]\n" + ".inst 0x4f98e8d4 // sdot v20.4s, v6.16b, v24.4b[2]\n" + ".inst 0x4fb8e8c2 // sdot v2.4s, v6.16b, v24.4b[3]\n" + "ldr q24, [x23, #0x40]\n" + ".inst 0x4f8ae389 // sdot v9.4s, v28.16b, v10.4b[0]\n" + ".inst 0x4faae39d // sdot v29.4s, v28.16b, v10.4b[1]\n" + ".inst 0x4f8aeb94 // sdot v20.4s, v28.16b, v10.4b[2]\n" + ".inst 0x4faaeb82 // sdot v2.4s, v28.16b, v10.4b[3]\n" + "ldr q10, [x23, #0x50]\n" + ".inst 0x4f98e069 // sdot v9.4s, v3.16b, v24.4b[0]\n" + ".inst 0x4fb8e07d // sdot v29.4s, v3.16b, v24.4b[1]\n" + ".inst 0x4f98e874 // sdot v20.4s, v3.16b, v24.4b[2]\n" + ".inst 0x4fb8e862 // sdot v2.4s, v3.16b, v24.4b[3]\n" + "ldr q24, [x23, #0x60]\n" + ".inst 0x4f8ae2c9 // sdot v9.4s, v22.16b, v10.4b[0]\n" + ".inst 0x4faae2dd // sdot v29.4s, v22.16b, v10.4b[1]\n" + ".inst 0x4f8aead4 // sdot v20.4s, v22.16b, v10.4b[2]\n" + ".inst 0x4faaeac2 // sdot v2.4s, v22.16b, v10.4b[3]\n" + "ldr q10, [x23, #0x70]\n" + "add x23, x23, #0x88\n" + ".inst 0x4f98e369 // sdot v9.4s, v27.16b, v24.4b[0]\n" + ".inst 0x4fb8e37d // sdot v29.4s, v27.16b, v24.4b[1]\n" + ".inst 0x4f98eb74 // sdot v20.4s, v27.16b, v24.4b[2]\n" + ".inst 0x4fb8eb62 // sdot v2.4s, v27.16b, v24.4b[3]\n" + "ldr q24, [x22, #0x0]\n" + ".inst 0x4f8ae3c9 // sdot v9.4s, v30.16b, v10.4b[0]\n" + ".inst 0x4faae3dd // sdot v29.4s, v30.16b, v10.4b[1]\n" + ".inst 0x4f8aebd4 // sdot v20.4s, v30.16b, v10.4b[2]\n" + ".inst 0x4faaebc2 // sdot v2.4s, v30.16b, v10.4b[3]\n" + "fmul v10.4s, v17.4s, v26.s[0]\n" + "scvtf v9.4s, v9.4s, #0x4\n" + "scvtf v29.4s, v29.4s, #0x4\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "scvtf v2.4s, v2.4s, #0x4\n" + "fmla v11.4s, v9.4s, v10.4s\n" + "ldr q9, [x22, #0x10]\n" + "fmul v10.4s, v17.4s, v26.s[1]\n" + "fmla v13.4s, v29.4s, v10.4s\n" + "ldr d29, [x22, #-0x8]\n" + "fmul v10.4s, v17.4s, v26.s[2]\n" + "fmul v26.4s, v17.4s, v26.s[3]\n" + "fcvtl v29.4s, v29.4h\n" + "fmla v23.4s, v20.4s, v10.4s\n" + "movi v20.4s, #0x0\n" + "movi v10.4s, #0x0\n" + "fmla v16.4s, v2.4s, v26.4s\n" + "movi v26.4s, #0x0\n" + "movi v2.4s, #0x0\n" + ".inst 0x4f98e194 // sdot v20.4s, v12.16b, v24.4b[0]\n" + ".inst 0x4fb8e18a // sdot v10.4s, v12.16b, v24.4b[1]\n" + ".inst 0x4f98e99a // sdot v26.4s, v12.16b, v24.4b[2]\n" + ".inst 0x4fb8e982 // sdot v2.4s, v12.16b, v24.4b[3]\n" + "ldr q24, [x22, #0x20]\n" + ".inst 0x4f89e3f4 // sdot v20.4s, v31.16b, v9.4b[0]\n" + ".inst 0x4fa9e3ea // sdot v10.4s, v31.16b, v9.4b[1]\n" + ".inst 0x4f89ebfa // sdot v26.4s, v31.16b, v9.4b[2]\n" + ".inst 0x4fa9ebe2 // sdot v2.4s, v31.16b, v9.4b[3]\n" + "ldr q9, [x22, #0x30]\n" + ".inst 0x4f98e0d4 // sdot v20.4s, v6.16b, v24.4b[0]\n" + ".inst 0x4fb8e0ca // sdot v10.4s, v6.16b, v24.4b[1]\n" + ".inst 0x4f98e8da // sdot v26.4s, v6.16b, v24.4b[2]\n" + ".inst 0x4fb8e8c2 // sdot v2.4s, v6.16b, v24.4b[3]\n" + "ldr q24, [x22, #0x40]\n" + ".inst 0x4f89e394 // sdot v20.4s, v28.16b, v9.4b[0]\n" + ".inst 0x4fa9e38a // sdot v10.4s, v28.16b, v9.4b[1]\n" + ".inst 0x4f89eb9a // sdot v26.4s, v28.16b, v9.4b[2]\n" + ".inst 0x4fa9eb82 // sdot v2.4s, v28.16b, v9.4b[3]\n" + "ldr q9, [x22, #0x50]\n" + ".inst 0x4f98e074 // sdot v20.4s, v3.16b, v24.4b[0]\n" + ".inst 0x4fb8e06a // sdot v10.4s, v3.16b, v24.4b[1]\n" + ".inst 0x4f98e87a // sdot v26.4s, v3.16b, v24.4b[2]\n" + ".inst 0x4fb8e862 // sdot v2.4s, v3.16b, v24.4b[3]\n" + "ldr q24, [x22, #0x60]\n" + ".inst 0x4f89e2d4 // sdot v20.4s, v22.16b, v9.4b[0]\n" + ".inst 0x4fa9e2ca // sdot v10.4s, v22.16b, v9.4b[1]\n" + ".inst 0x4f89eada // sdot v26.4s, v22.16b, v9.4b[2]\n" + ".inst 0x4fa9eac2 // sdot v2.4s, v22.16b, v9.4b[3]\n" + "ldr q9, [x22, #0x70]\n" + "add x22, x22, #0x88\n" + ".inst 0x4f98e374 // sdot v20.4s, v27.16b, v24.4b[0]\n" + ".inst 0x4fb8e36a // sdot v10.4s, v27.16b, v24.4b[1]\n" + ".inst 0x4f98eb7a // sdot v26.4s, v27.16b, v24.4b[2]\n" + ".inst 0x4fb8eb62 // sdot v2.4s, v27.16b, v24.4b[3]\n" + "ldr q24, [x21, #0x0]\n" + ".inst 0x4f89e3d4 // sdot v20.4s, v30.16b, v9.4b[0]\n" + ".inst 0x4fa9e3ca // sdot v10.4s, v30.16b, v9.4b[1]\n" + ".inst 0x4f89ebda // sdot v26.4s, v30.16b, v9.4b[2]\n" + ".inst 0x4fa9ebc2 // sdot v2.4s, v30.16b, v9.4b[3]\n" + "fmul v9.4s, v17.4s, v29.s[0]\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "scvtf v10.4s, v10.4s, #0x4\n" + "scvtf v26.4s, v26.4s, #0x4\n" + "scvtf v2.4s, v2.4s, #0x4\n" + "fmla v25.4s, v20.4s, v9.4s\n" + "ldr q9, [x21, #0x10]\n" + "fmul v20.4s, v17.4s, v29.s[1]\n" + "fmla v7.4s, v10.4s, v20.4s\n" + "ldr d20, [x21, #-0x8]\n" + "fmul v10.4s, v17.4s, v29.s[2]\n" + "fmul v29.4s, v17.4s, v29.s[3]\n" + "fcvtl v20.4s, v20.4h\n" + "fmla v0.4s, v26.4s, v10.4s\n" + "movi v26.4s, #0x0\n" + "movi v10.4s, #0x0\n" + "fmla v4.4s, v2.4s, v29.4s\n" + "movi v2.4s, #0x0\n" + "movi v29.4s, #0x0\n" + ".inst 0x4f98e19a // sdot v26.4s, v12.16b, v24.4b[0]\n" + ".inst 0x4fb8e18a // sdot v10.4s, v12.16b, v24.4b[1]\n" + ".inst 0x4f98e982 // sdot v2.4s, v12.16b, v24.4b[2]\n" + ".inst 0x4fb8e99d // sdot v29.4s, v12.16b, v24.4b[3]\n" + "ldr q12, [x21, #0x20]\n" + "fmul v24.4s, v17.4s, v20.s[0]\n" + ".inst 0x4f89e3fa // sdot v26.4s, v31.16b, v9.4b[0]\n" + ".inst 0x4fa9e3ea // sdot v10.4s, v31.16b, v9.4b[1]\n" + ".inst 0x4f89ebe2 // sdot v2.4s, v31.16b, v9.4b[2]\n" + ".inst 0x4fa9ebfd // sdot v29.4s, v31.16b, v9.4b[3]\n" + "ldr q9, [x21, #0x30]\n" + "fmul v31.4s, v17.4s, v20.s[1]\n" + ".inst 0x4f8ce0da // sdot v26.4s, v6.16b, v12.4b[0]\n" + ".inst 0x4face0ca // sdot v10.4s, v6.16b, v12.4b[1]\n" + ".inst 0x4f8ce8c2 // sdot v2.4s, v6.16b, v12.4b[2]\n" + ".inst 0x4face8dd // sdot v29.4s, v6.16b, v12.4b[3]\n" + "ldr q12, [x21, #0x40]\n" + "fmul v6.4s, v17.4s, v20.s[2]\n" + "fmul v20.4s, v17.4s, v20.s[3]\n" + ".inst 0x4f89e39a // sdot v26.4s, v28.16b, v9.4b[0]\n" + ".inst 0x4fa9e38a // sdot v10.4s, v28.16b, v9.4b[1]\n" + ".inst 0x4f89eb82 // sdot v2.4s, v28.16b, v9.4b[2]\n" + ".inst 0x4fa9eb9d // sdot v29.4s, v28.16b, v9.4b[3]\n" + "ldr q9, [x21, #0x50]\n" + ".inst 0x4f8ce07a // sdot v26.4s, v3.16b, v12.4b[0]\n" + ".inst 0x4face06a // sdot v10.4s, v3.16b, v12.4b[1]\n" + ".inst 0x4f8ce862 // sdot v2.4s, v3.16b, v12.4b[2]\n" + ".inst 0x4face87d // sdot v29.4s, v3.16b, v12.4b[3]\n" + "ldr q12, [x21, #0x60]\n" + ".inst 0x4f89e2da // sdot v26.4s, v22.16b, v9.4b[0]\n" + ".inst 0x4fa9e2ca // sdot v10.4s, v22.16b, v9.4b[1]\n" + ".inst 0x4f89eac2 // sdot v2.4s, v22.16b, v9.4b[2]\n" + ".inst 0x4fa9eadd // sdot v29.4s, v22.16b, v9.4b[3]\n" + "ldr q17, [x21, #0x70]\n" + "add x21, x21, #0x88\n" + ".inst 0x4f8ce37a // sdot v26.4s, v27.16b, v12.4b[0]\n" + ".inst 0x4face36a // sdot v10.4s, v27.16b, v12.4b[1]\n" + ".inst 0x4f8ceb62 // sdot v2.4s, v27.16b, v12.4b[2]\n" + ".inst 0x4faceb7d // sdot v29.4s, v27.16b, v12.4b[3]\n" + ".inst 0x4f91e3da // sdot v26.4s, v30.16b, v17.4b[0]\n" + ".inst 0x4fb1e3ca // sdot v10.4s, v30.16b, v17.4b[1]\n" + ".inst 0x4f91ebc2 // sdot v2.4s, v30.16b, v17.4b[2]\n" + ".inst 0x4fb1ebdd // sdot v29.4s, v30.16b, v17.4b[3]\n" + "scvtf v26.4s, v26.4s, #0x4\n" + "scvtf v10.4s, v10.4s, #0x4\n" + "fmla v5.4s, v26.4s, v24.4s\n" + "scvtf v2.4s, v2.4s, #0x4\n" + "scvtf v29.4s, v29.4s, #0x4\n" + "fmla v21.4s, v10.4s, v31.4s\n" + "fmla v8.4s, v2.4s, v6.4s\n" + "fmla v1.4s, v29.4s, v20.4s\n" + "bgt 3b\n" + "mov x20, %x[res_ptr]\n" + "subs x27, x27, #0x4\n" + "add %x[res_ptr], %x[res_ptr], #0x10\n" + "str q15, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q19, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q18, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q14, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q11, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q13, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q23, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q16, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q25, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q7, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q0, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q4, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q5, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q21, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q8, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q1, [x20, #0x0]\n" + "bne 2b\n" + "mov x20, #0x4\n" + "sub x10, x10, #0x10\n" + "cmp x10, #0x10\n" + "mov %x[res_ptr], x26\n" + "madd %x[a_ptr], x20, x9, %x[a_ptr]\n" + "bge 1b\n" + "4:" // Row loop skip + "cbz x10, 9f\n" + "5:" // Row tail: Row loop + "add x24, %x[b_ptr], #0x8\n" + "mov x23, %x[nc]\n" + "add x22, %x[res_ptr], %x[res_stride], LSL #2\n" + "6:" // Row tail: Column loop + "movi v15.16b, #0x0\n" + "movi v19.16b, #0x0\n" + "add x25, %x[a_ptr], #0x8\n" + "mov x21, %x[nb]\n" + "movi v18.16b, #0x0\n" + "movi v14.16b, #0x0\n" + "7:" // Row tail: Block loop + "ldr q7, [x24, #0x0]\n" + "ldr q5, [x25, #0x0]\n" + "movi v9.16b, #0x4\n" + "movi v4.4s, #0x0\n" + "ldr q3, [x24, #0x10]\n" + "ldr q2, [x25, #0x10]\n" + "movi v1.4s, #0x0\n" + "movi v0.4s, #0x0\n" + "ldr q13, [x24, #0x20]\n" + "ldr q31, [x25, #0x20]\n" + "movi v30.4s, #0x0\n" + "movi v29.16b, #0xf0\n" + "ldr q28, [x24, #0x30]\n" + "ldr q27, [x25, #0x30]\n" + "sshl v20.16b, v7.16b, v9.16b\n" + "sub x20, x24, #0x8\n" + "ldr q26, [x25, #0x40]\n" + "ldr q25, [x25, #0x50]\n" + "sshl v17.16b, v3.16b, v9.16b\n" + "and v7.16b, v7.16b, v29.16b\n" + "ldr q24, [x25, #0x60]\n" + "ldr q16, [x25, #0x70]\n" + "sshl v22.16b, v13.16b, v9.16b\n" + "and v3.16b, v3.16b, v29.16b\n" + "ldr d21, [x20, #0x0]\n" + "ldr d12, [x25, #-0x8]\n" + ".inst 0x4f85e284 // sdot v4.4s, v20.16b, v5.4b[0]\n" + ".inst 0x4fa5e281 // sdot v1.4s, v20.16b, v5.4b[1]\n" + ".inst 0x4f85ea80 // sdot v0.4s, v20.16b, v5.4b[2]\n" + ".inst 0x4fa5ea9e // sdot v30.4s, v20.16b, v5.4b[3]\n" + "sshl v9.16b, v28.16b, v9.16b\n" + "subs x21, x21, #0x1\n" + "and v13.16b, v13.16b, v29.16b\n" + "and v28.16b, v28.16b, v29.16b\n" + "add x25, x25, #0x88\n" + "add x24, x24, #0x48\n" + "fcvtl v21.4s, v21.4h\n" + "fcvtl v12.4s, v12.4h\n" + ".inst 0x4f82e224 // sdot v4.4s, v17.16b, v2.4b[0]\n" + ".inst 0x4fa2e221 // sdot v1.4s, v17.16b, v2.4b[1]\n" + ".inst 0x4f82ea20 // sdot v0.4s, v17.16b, v2.4b[2]\n" + ".inst 0x4fa2ea3e // sdot v30.4s, v17.16b, v2.4b[3]\n" + "fmul v11.4s, v21.4s, v12.s[0]\n" + "fmul v23.4s, v21.4s, v12.s[1]\n" + "fmul v17.4s, v21.4s, v12.s[2]\n" + ".inst 0x4f9fe2c4 // sdot v4.4s, v22.16b, v31.4b[0]\n" + "fmul v6.4s, v21.4s, v12.s[3]\n" + ".inst 0x4fbfe2c1 // sdot v1.4s, v22.16b, v31.4b[1]\n" + ".inst 0x4f9feac0 // sdot v0.4s, v22.16b, v31.4b[2]\n" + ".inst 0x4fbfeade // sdot v30.4s, v22.16b, v31.4b[3]\n" + ".inst 0x4f9be124 // sdot v4.4s, v9.16b, v27.4b[0]\n" + ".inst 0x4fbbe121 // sdot v1.4s, v9.16b, v27.4b[1]\n" + ".inst 0x4f9be920 // sdot v0.4s, v9.16b, v27.4b[2]\n" + ".inst 0x4fbbe93e // sdot v30.4s, v9.16b, v27.4b[3]\n" + ".inst 0x4f9ae0e4 // sdot v4.4s, v7.16b, v26.4b[0]\n" + ".inst 0x4fbae0e1 // sdot v1.4s, v7.16b, v26.4b[1]\n" + ".inst 0x4f9ae8e0 // sdot v0.4s, v7.16b, v26.4b[2]\n" + ".inst 0x4fbae8fe // sdot v30.4s, v7.16b, v26.4b[3]\n" + ".inst 0x4f99e064 // sdot v4.4s, v3.16b, v25.4b[0]\n" + ".inst 0x4fb9e061 // sdot v1.4s, v3.16b, v25.4b[1]\n" + ".inst 0x4f99e860 // sdot v0.4s, v3.16b, v25.4b[2]\n" + ".inst 0x4fb9e87e // sdot v30.4s, v3.16b, v25.4b[3]\n" + ".inst 0x4f98e1a4 // sdot v4.4s, v13.16b, v24.4b[0]\n" + ".inst 0x4fb8e1a1 // sdot v1.4s, v13.16b, v24.4b[1]\n" + ".inst 0x4f98e9a0 // sdot v0.4s, v13.16b, v24.4b[2]\n" + ".inst 0x4fb8e9be // sdot v30.4s, v13.16b, v24.4b[3]\n" + ".inst 0x4f90e384 // sdot v4.4s, v28.16b, v16.4b[0]\n" + ".inst 0x4fb0e381 // sdot v1.4s, v28.16b, v16.4b[1]\n" + ".inst 0x4f90eb80 // sdot v0.4s, v28.16b, v16.4b[2]\n" + ".inst 0x4fb0eb9e // sdot v30.4s, v28.16b, v16.4b[3]\n" + "scvtf v4.4s, v4.4s, #0x4\n" + "scvtf v1.4s, v1.4s, #0x4\n" + "scvtf v0.4s, v0.4s, #0x4\n" + "fmla v15.4s, v4.4s, v11.4s\n" + "scvtf v30.4s, v30.4s, #0x4\n" + "fmla v19.4s, v1.4s, v23.4s\n" + "fmla v18.4s, v0.4s, v17.4s\n" + "fmla v14.4s, v30.4s, v6.4s\n" + "bgt 7b\n" + "mov x20, %x[res_ptr]\n" + "cmp x10, #0x1\n" + "str q15, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "cmp x10, #0x2\n" + "str q19, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "cmp x10, #0x3\n" + "str q18, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "str q14, [x20, #0x0]\n" + "8:" // Row tail: Accumulator store skip + "subs x23, x23, #0x4\n" + "add %x[res_ptr], %x[res_ptr], #0x10\n" + "bne 6b\n" + "subs x10, x10, #0x4\n" + "add %x[a_ptr], %x[a_ptr], x9\n" + "mov %x[res_ptr], x22\n" + "bgt 5b\n" + "9:" // Row tail: Row loop skip + : [a_ptr] "+&r" (a_ptr), [res_ptr] "+&r" (res_ptr) + : [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) { float sumf[4][4]; @@ -1125,7 +1118,7 @@ void ggml_gemm_q4_0_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -1160,404 +1153,402 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo UNUSED(blocklen); #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) - if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { - const void * b_ptr = vx; - const void * a_ptr = vy; - float * res_ptr = s; - size_t res_stride = bs * sizeof(float); - - __asm__ __volatile__( - "mov x10, %x[nr]\n" - "mov x9, #0x88\n" - "cmp x10, #0x10\n" - "mul x9, %x[nb], x9\n" - "blt 4f\n" - "1:" // Row loop - "add x28, %x[b_ptr], #0x8\n" - "mov x27, %x[nc]\n" - "add x26, %x[res_ptr], %x[res_stride], LSL #4\n" - "2:" // Column loop - "add x25, %x[a_ptr], #0x8\n" - "movi v2.16b, #0x0\n" - "movi v10.16b, #0x0\n" - "mov x24, %x[nb]\n" - "add x23, x25, x9\n" - "movi v12.16b, #0x0\n" - "movi v28.16b, #0x0\n" - "add x22, x23, x9\n" - "movi v11.16b, #0x0\n" - "movi v13.16b, #0x0\n" - "add x21, x22, x9\n" - "movi v22.16b, #0x0\n" - "movi v23.16b, #0x0\n" - "movi v25.16b, #0x0\n" - "movi v5.16b, #0x0\n" - "movi v7.16b, #0x0\n" - "movi v4.16b, #0x0\n" - "movi v6.16b, #0x0\n" - "movi v30.16b, #0x0\n" - "movi v24.16b, #0x0\n" - "movi v14.16b, #0x0\n" - "3:" // Block loop - "ldr q21, [x28, #0x0]\n" - "ldr q16, [x28, #0x10]\n" - "movi v1.16b, #0x4\n" - "movi v19.4s, #0x0\n" - "ldr q27, [x25, #0x0]\n" - "ldr q15, [x25, #0x10]\n" - "movi v26.4s, #0x0\n" - "movi v18.4s, #0x0\n" - "ldr q29, [x28, #0x20]\n" - "ldr q3, [x28, #0x30]\n" - "movi v17.4s, #0x0\n" - "movi v0.16b, #0xf0\n" - "ldr d20, [x25, #-0x8]\n" - "ldr d9, [x23, #-0x8]\n" - "sshl v8.16b, v21.16b, v1.16b\n" - "sshl v31.16b, v16.16b, v1.16b\n" - "and v21.16b, v21.16b, v0.16b\n" - "and v16.16b, v16.16b, v0.16b\n" - "sub x20, x28, #0x8\n" - "subs x24, x24, #0x1\n" - "add x28, x28, #0x48\n" - ".inst 0x4e88a773 // smmla v19.4s, v27.16b, v8.16b\n" - ".inst 0x4e9fa77a // smmla v26.4s, v27.16b, v31.16b\n" - "ldr q27, [x25, #0x20]\n" - ".inst 0x4e88a5f2 // smmla v18.4s, v15.16b, v8.16b\n" - ".inst 0x4e9fa5f1 // smmla v17.4s, v15.16b, v31.16b\n" - "sshl v15.16b, v29.16b, v1.16b\n" - "sshl v1.16b, v3.16b, v1.16b\n" - "and v29.16b, v29.16b, v0.16b\n" - "and v3.16b, v3.16b, v0.16b\n" - "ldr q0, [x25, #0x30]\n" - "fcvtl v20.4s, v20.4h\n" - ".inst 0x4e8fa773 // smmla v19.4s, v27.16b, v15.16b\n" - "fcvtl v9.4s, v9.4h\n" - ".inst 0x4e81a77a // smmla v26.4s, v27.16b, v1.16b\n" - "ldr q27, [x25, #0x40]\n" - ".inst 0x4e8fa412 // smmla v18.4s, v0.16b, v15.16b\n" - ".inst 0x4e81a411 // smmla v17.4s, v0.16b, v1.16b\n" - "ldr q0, [x25, #0x50]\n" - ".inst 0x4e95a773 // smmla v19.4s, v27.16b, v21.16b\n" - ".inst 0x4e90a77a // smmla v26.4s, v27.16b, v16.16b\n" - "ldr q27, [x25, #0x60]\n" - ".inst 0x4e95a412 // smmla v18.4s, v0.16b, v21.16b\n" - ".inst 0x4e90a411 // smmla v17.4s, v0.16b, v16.16b\n" - "ldr q0, [x25, #0x70]\n" - "add x25, x25, #0x88\n" - ".inst 0x4e9da773 // smmla v19.4s, v27.16b, v29.16b\n" - ".inst 0x4e83a77a // smmla v26.4s, v27.16b, v3.16b\n" - "ldr d27, [x20, #0x0]\n" - ".inst 0x4e9da412 // smmla v18.4s, v0.16b, v29.16b\n" - ".inst 0x4e83a411 // smmla v17.4s, v0.16b, v3.16b\n" - "fcvtl v27.4s, v27.4h\n" - "uzp1 v0.2d, v19.2d, v26.2d\n" - "uzp2 v26.2d, v19.2d, v26.2d\n" - "fmul v19.4s, v27.4s, v20.s[0]\n" - "scvtf v0.4s, v0.4s, #0x4\n" - "scvtf v26.4s, v26.4s, #0x4\n" - "fmla v2.4s, v0.4s, v19.4s\n" - "ldr q19, [x23, #0x0]\n" - "uzp1 v0.2d, v18.2d, v17.2d\n" - "uzp2 v18.2d, v18.2d, v17.2d\n" - "fmul v17.4s, v27.4s, v20.s[1]\n" - "scvtf v0.4s, v0.4s, #0x4\n" - "scvtf v18.4s, v18.4s, #0x4\n" - "fmla v10.4s, v26.4s, v17.4s\n" - "ldr q17, [x23, #0x10]\n" - "fmul v26.4s, v27.4s, v20.s[2]\n" - "fmul v20.4s, v27.4s, v20.s[3]\n" - "fmla v12.4s, v0.4s, v26.4s\n" - "ldr d0, [x22, #-0x8]\n" - "ldr d26, [x21, #-0x8]\n" - "fcvtl v0.4s, v0.4h\n" - "fmla v28.4s, v18.4s, v20.4s\n" - "movi v20.4s, #0x0\n" - "movi v18.4s, #0x0\n" - ".inst 0x4e88a674 // smmla v20.4s, v19.16b, v8.16b\n" - ".inst 0x4e9fa672 // smmla v18.4s, v19.16b, v31.16b\n" - "ldr q19, [x23, #0x20]\n" - "fcvtl v26.4s, v26.4h\n" - ".inst 0x4e8fa674 // smmla v20.4s, v19.16b, v15.16b\n" - ".inst 0x4e81a672 // smmla v18.4s, v19.16b, v1.16b\n" - "ldr q19, [x23, #0x40]\n" - ".inst 0x4e95a674 // smmla v20.4s, v19.16b, v21.16b\n" - ".inst 0x4e90a672 // smmla v18.4s, v19.16b, v16.16b\n" - "ldr q19, [x23, #0x60]\n" - ".inst 0x4e9da674 // smmla v20.4s, v19.16b, v29.16b\n" - ".inst 0x4e83a672 // smmla v18.4s, v19.16b, v3.16b\n" - "uzp1 v19.2d, v20.2d, v18.2d\n" - "scvtf v19.4s, v19.4s, #0x4\n" - "uzp2 v20.2d, v20.2d, v18.2d\n" - "fmul v18.4s, v27.4s, v9.s[0]\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "fmla v11.4s, v19.4s, v18.4s\n" - "ldr q18, [x22, #0x0]\n" - "fmul v19.4s, v27.4s, v9.s[1]\n" - "fmla v13.4s, v20.4s, v19.4s\n" - "movi v19.4s, #0x0\n" - "movi v20.4s, #0x0\n" - ".inst 0x4e88a633 // smmla v19.4s, v17.16b, v8.16b\n" - ".inst 0x4e9fa634 // smmla v20.4s, v17.16b, v31.16b\n" - "ldr q17, [x23, #0x30]\n" - ".inst 0x4e8fa633 // smmla v19.4s, v17.16b, v15.16b\n" - ".inst 0x4e81a634 // smmla v20.4s, v17.16b, v1.16b\n" - "ldr q17, [x23, #0x50]\n" - ".inst 0x4e95a633 // smmla v19.4s, v17.16b, v21.16b\n" - ".inst 0x4e90a634 // smmla v20.4s, v17.16b, v16.16b\n" - "ldr q17, [x23, #0x70]\n" - "add x23, x23, #0x88\n" - ".inst 0x4e9da633 // smmla v19.4s, v17.16b, v29.16b\n" - ".inst 0x4e83a634 // smmla v20.4s, v17.16b, v3.16b\n" - "uzp1 v17.2d, v19.2d, v20.2d\n" - "scvtf v17.4s, v17.4s, #0x4\n" - "uzp2 v20.2d, v19.2d, v20.2d\n" - "fmul v19.4s, v27.4s, v9.s[2]\n" - "fmul v9.4s, v27.4s, v9.s[3]\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "fmla v22.4s, v17.4s, v19.4s\n" - "ldr q17, [x22, #0x10]\n" - "movi v19.4s, #0x0\n" - ".inst 0x4e88a653 // smmla v19.4s, v18.16b, v8.16b\n" - "fmla v23.4s, v20.4s, v9.4s\n" - "movi v20.4s, #0x0\n" - "movi v9.4s, #0x0\n" - ".inst 0x4e9fa654 // smmla v20.4s, v18.16b, v31.16b\n" - "ldr q18, [x22, #0x20]\n" - ".inst 0x4e88a629 // smmla v9.4s, v17.16b, v8.16b\n" - ".inst 0x4e8fa653 // smmla v19.4s, v18.16b, v15.16b\n" - ".inst 0x4e81a654 // smmla v20.4s, v18.16b, v1.16b\n" - "ldr q18, [x22, #0x40]\n" - ".inst 0x4e95a653 // smmla v19.4s, v18.16b, v21.16b\n" - ".inst 0x4e90a654 // smmla v20.4s, v18.16b, v16.16b\n" - "ldr q18, [x22, #0x60]\n" - ".inst 0x4e9da653 // smmla v19.4s, v18.16b, v29.16b\n" - ".inst 0x4e83a654 // smmla v20.4s, v18.16b, v3.16b\n" - "movi v18.4s, #0x0\n" - ".inst 0x4e9fa632 // smmla v18.4s, v17.16b, v31.16b\n" - "ldr q17, [x22, #0x30]\n" - ".inst 0x4e8fa629 // smmla v9.4s, v17.16b, v15.16b\n" - ".inst 0x4e81a632 // smmla v18.4s, v17.16b, v1.16b\n" - "ldr q17, [x22, #0x50]\n" - ".inst 0x4e95a629 // smmla v9.4s, v17.16b, v21.16b\n" - ".inst 0x4e90a632 // smmla v18.4s, v17.16b, v16.16b\n" - "ldr q17, [x22, #0x70]\n" - "add x22, x22, #0x88\n" - ".inst 0x4e9da629 // smmla v9.4s, v17.16b, v29.16b\n" - ".inst 0x4e83a632 // smmla v18.4s, v17.16b, v3.16b\n" - "uzp1 v17.2d, v19.2d, v20.2d\n" - "uzp2 v20.2d, v19.2d, v20.2d\n" - "fmul v19.4s, v27.4s, v0.s[0]\n" - "scvtf v17.4s, v17.4s, #0x4\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "fmla v25.4s, v17.4s, v19.4s\n" - "ldr q19, [x21, #0x0]\n" - "fmul v17.4s, v27.4s, v0.s[1]\n" - "fmla v5.4s, v20.4s, v17.4s\n" - "ldr q17, [x21, #0x10]\n" - "uzp1 v20.2d, v9.2d, v18.2d\n" - "uzp2 v9.2d, v9.2d, v18.2d\n" - "fmul v18.4s, v27.4s, v0.s[2]\n" - "fmul v0.4s, v27.4s, v0.s[3]\n" - "scvtf v20.4s, v20.4s, #0x4\n" - "scvtf v9.4s, v9.4s, #0x4\n" - "fmla v7.4s, v20.4s, v18.4s\n" - "movi v20.4s, #0x0\n" - "movi v18.4s, #0x0\n" - ".inst 0x4e88a674 // smmla v20.4s, v19.16b, v8.16b\n" - ".inst 0x4e9fa672 // smmla v18.4s, v19.16b, v31.16b\n" - "ldr q19, [x21, #0x20]\n" - "fmla v4.4s, v9.4s, v0.4s\n" - "movi v9.4s, #0x0\n" - "movi v0.4s, #0x0\n" - ".inst 0x4e88a629 // smmla v9.4s, v17.16b, v8.16b\n" - "fmul v8.4s, v27.4s, v26.s[0]\n" - ".inst 0x4e9fa620 // smmla v0.4s, v17.16b, v31.16b\n" - "ldr q17, [x21, #0x30]\n" - ".inst 0x4e8fa674 // smmla v20.4s, v19.16b, v15.16b\n" - "fmul v31.4s, v27.4s, v26.s[1]\n" - ".inst 0x4e81a672 // smmla v18.4s, v19.16b, v1.16b\n" - "ldr q19, [x21, #0x40]\n" - ".inst 0x4e8fa629 // smmla v9.4s, v17.16b, v15.16b\n" - "fmul v15.4s, v27.4s, v26.s[2]\n" - "fmul v27.4s, v27.4s, v26.s[3]\n" - ".inst 0x4e81a620 // smmla v0.4s, v17.16b, v1.16b\n" - "ldr q1, [x21, #0x50]\n" - ".inst 0x4e95a674 // smmla v20.4s, v19.16b, v21.16b\n" - ".inst 0x4e90a672 // smmla v18.4s, v19.16b, v16.16b\n" - "ldr q26, [x21, #0x60]\n" - ".inst 0x4e95a429 // smmla v9.4s, v1.16b, v21.16b\n" - ".inst 0x4e90a420 // smmla v0.4s, v1.16b, v16.16b\n" - "ldr q21, [x21, #0x70]\n" - "add x21, x21, #0x88\n" - ".inst 0x4e9da754 // smmla v20.4s, v26.16b, v29.16b\n" - ".inst 0x4e83a752 // smmla v18.4s, v26.16b, v3.16b\n" - ".inst 0x4e9da6a9 // smmla v9.4s, v21.16b, v29.16b\n" - ".inst 0x4e83a6a0 // smmla v0.4s, v21.16b, v3.16b\n" - "uzp1 v29.2d, v20.2d, v18.2d\n" - "uzp2 v21.2d, v20.2d, v18.2d\n" - "scvtf v29.4s, v29.4s, #0x4\n" - "uzp1 v18.2d, v9.2d, v0.2d\n" - "uzp2 v16.2d, v9.2d, v0.2d\n" - "scvtf v21.4s, v21.4s, #0x4\n" - "fmla v6.4s, v29.4s, v8.4s\n" - "scvtf v18.4s, v18.4s, #0x4\n" - "scvtf v16.4s, v16.4s, #0x4\n" - "fmla v30.4s, v21.4s, v31.4s\n" - "fmla v24.4s, v18.4s, v15.4s\n" - "fmla v14.4s, v16.4s, v27.4s\n" - "bgt 3b\n" - "mov x20, %x[res_ptr]\n" - "subs x27, x27, #0x4\n" - "add %x[res_ptr], %x[res_ptr], #0x10\n" - "str q2, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q10, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q12, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q28, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q11, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q13, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q22, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q23, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q25, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q5, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q7, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q4, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q6, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q30, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q24, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "str q14, [x20, #0x0]\n" - "bne 2b\n" - "mov x20, #0x4\n" - "sub x10, x10, #0x10\n" - "cmp x10, #0x10\n" - "mov %x[res_ptr], x26\n" - "madd %x[a_ptr], x20, x9, %x[a_ptr]\n" - "bge 1b\n" - "4:" // Row loop skip - "cbz x10, 9f\n" - "5:" // Row tail: Row loop - "add x24, %x[b_ptr], #0x8\n" - "mov x23, %x[nc]\n" - "add x22, %x[res_ptr], %x[res_stride], LSL #2\n" - "6:" // Row tail: Column loop - "movi v2.16b, #0x0\n" - "movi v10.16b, #0x0\n" - "add x25, %x[a_ptr], #0x8\n" - "mov x21, %x[nb]\n" - "movi v12.16b, #0x0\n" - "movi v28.16b, #0x0\n" - "7:" // Row tail: Block loop - "ldr q6, [x24, #0x0]\n" - "ldr q5, [x24, #0x10]\n" - "movi v17.16b, #0x4\n" - "movi v8.4s, #0x0\n" - "ldr q4, [x25, #0x0]\n" - "ldr q13, [x25, #0x10]\n" - "movi v27.4s, #0x0\n" - "movi v0.4s, #0x0\n" - "ldr q31, [x24, #0x20]\n" - "ldr q14, [x24, #0x30]\n" - "movi v29.4s, #0x0\n" - "movi v22.16b, #0xf0\n" - "ldr q11, [x25, #0x20]\n" - "ldr q23, [x25, #0x30]\n" - "sshl v21.16b, v6.16b, v17.16b\n" - "sshl v16.16b, v5.16b, v17.16b\n" - "ldr q20, [x25, #0x40]\n" - "ldr q26, [x25, #0x50]\n" - "and v6.16b, v6.16b, v22.16b\n" - "and v5.16b, v5.16b, v22.16b\n" - "ldr q25, [x25, #0x60]\n" - "ldr q3, [x25, #0x70]\n" - "sshl v19.16b, v31.16b, v17.16b\n" - "sshl v18.16b, v14.16b, v17.16b\n" - "ldr d17, [x25, #-0x8]\n" - ".inst 0x4e95a488 // smmla v8.4s, v4.16b, v21.16b\n" - ".inst 0x4e90a49b // smmla v27.4s, v4.16b, v16.16b\n" - "and v31.16b, v31.16b, v22.16b\n" - ".inst 0x4e95a5a0 // smmla v0.4s, v13.16b, v21.16b\n" - ".inst 0x4e90a5bd // smmla v29.4s, v13.16b, v16.16b\n" - "and v14.16b, v14.16b, v22.16b\n" - "sub x20, x24, #0x8\n" - "ldr d16, [x20, #0x0]\n" - "subs x21, x21, #0x1\n" - "add x25, x25, #0x88\n" - "fcvtl v17.4s, v17.4h\n" - "add x24, x24, #0x48\n" - ".inst 0x4e93a568 // smmla v8.4s, v11.16b, v19.16b\n" - ".inst 0x4e92a57b // smmla v27.4s, v11.16b, v18.16b\n" - ".inst 0x4e93a6e0 // smmla v0.4s, v23.16b, v19.16b\n" - ".inst 0x4e92a6fd // smmla v29.4s, v23.16b, v18.16b\n" - "fcvtl v16.4s, v16.4h\n" - ".inst 0x4e86a688 // smmla v8.4s, v20.16b, v6.16b\n" - ".inst 0x4e85a69b // smmla v27.4s, v20.16b, v5.16b\n" - "fmul v23.4s, v16.4s, v17.s[0]\n" - "fmul v21.4s, v16.4s, v17.s[1]\n" - "fmul v1.4s, v16.4s, v17.s[2]\n" - "fmul v20.4s, v16.4s, v17.s[3]\n" - ".inst 0x4e86a740 // smmla v0.4s, v26.16b, v6.16b\n" - ".inst 0x4e85a75d // smmla v29.4s, v26.16b, v5.16b\n" - ".inst 0x4e9fa728 // smmla v8.4s, v25.16b, v31.16b\n" - ".inst 0x4e8ea73b // smmla v27.4s, v25.16b, v14.16b\n" - ".inst 0x4e9fa460 // smmla v0.4s, v3.16b, v31.16b\n" - ".inst 0x4e8ea47d // smmla v29.4s, v3.16b, v14.16b\n" - "uzp1 v19.2d, v8.2d, v27.2d\n" - "uzp2 v18.2d, v8.2d, v27.2d\n" - "scvtf v19.4s, v19.4s, #0x4\n" - "uzp1 v17.2d, v0.2d, v29.2d\n" - "uzp2 v16.2d, v0.2d, v29.2d\n" - "scvtf v18.4s, v18.4s, #0x4\n" - "fmla v2.4s, v19.4s, v23.4s\n" - "scvtf v17.4s, v17.4s, #0x4\n" - "scvtf v16.4s, v16.4s, #0x4\n" - "fmla v10.4s, v18.4s, v21.4s\n" - "fmla v12.4s, v17.4s, v1.4s\n" - "fmla v28.4s, v16.4s, v20.4s\n" - "bgt 7b\n" - "mov x20, %x[res_ptr]\n" - "cmp x10, #0x1\n" - "str q2, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "cmp x10, #0x2\n" - "str q10, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "cmp x10, #0x3\n" - "str q12, [x20, #0x0]\n" - "add x20, x20, %x[res_stride]\n" - "ble 8f\n" - "str q28, [x20, #0x0]\n" - "8:" // Row tail: Accumulator store skip - "subs x23, x23, #0x4\n" - "add %x[res_ptr], %x[res_ptr], #0x10\n" - "bne 6b\n" - "subs x10, x10, #0x4\n" - "add %x[a_ptr], %x[a_ptr], x9\n" - "mov %x[res_ptr], x22\n" - "bgt 5b\n" - "9:" // Row tail: Row loop skip - : [a_ptr] "+&r" (a_ptr), [res_ptr] "+&r" (res_ptr) - : [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc) - : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" - ); - return; - } + const void * b_ptr = vx; + const void * a_ptr = vy; + float * res_ptr = s; + size_t res_stride = bs * sizeof(float); + + __asm__ __volatile__( + "mov x10, %x[nr]\n" + "mov x9, #0x88\n" + "cmp x10, #0x10\n" + "mul x9, %x[nb], x9\n" + "blt 4f\n" + "1:" // Row loop + "add x28, %x[b_ptr], #0x8\n" + "mov x27, %x[nc]\n" + "add x26, %x[res_ptr], %x[res_stride], LSL #4\n" + "2:" // Column loop + "add x25, %x[a_ptr], #0x8\n" + "movi v2.16b, #0x0\n" + "movi v10.16b, #0x0\n" + "mov x24, %x[nb]\n" + "add x23, x25, x9\n" + "movi v12.16b, #0x0\n" + "movi v28.16b, #0x0\n" + "add x22, x23, x9\n" + "movi v11.16b, #0x0\n" + "movi v13.16b, #0x0\n" + "add x21, x22, x9\n" + "movi v22.16b, #0x0\n" + "movi v23.16b, #0x0\n" + "movi v25.16b, #0x0\n" + "movi v5.16b, #0x0\n" + "movi v7.16b, #0x0\n" + "movi v4.16b, #0x0\n" + "movi v6.16b, #0x0\n" + "movi v30.16b, #0x0\n" + "movi v24.16b, #0x0\n" + "movi v14.16b, #0x0\n" + "3:" // Block loop + "ldr q21, [x28, #0x0]\n" + "ldr q16, [x28, #0x10]\n" + "movi v1.16b, #0x4\n" + "movi v19.4s, #0x0\n" + "ldr q27, [x25, #0x0]\n" + "ldr q15, [x25, #0x10]\n" + "movi v26.4s, #0x0\n" + "movi v18.4s, #0x0\n" + "ldr q29, [x28, #0x20]\n" + "ldr q3, [x28, #0x30]\n" + "movi v17.4s, #0x0\n" + "movi v0.16b, #0xf0\n" + "ldr d20, [x25, #-0x8]\n" + "ldr d9, [x23, #-0x8]\n" + "sshl v8.16b, v21.16b, v1.16b\n" + "sshl v31.16b, v16.16b, v1.16b\n" + "and v21.16b, v21.16b, v0.16b\n" + "and v16.16b, v16.16b, v0.16b\n" + "sub x20, x28, #0x8\n" + "subs x24, x24, #0x1\n" + "add x28, x28, #0x48\n" + ".inst 0x4e88a773 // smmla v19.4s, v27.16b, v8.16b\n" + ".inst 0x4e9fa77a // smmla v26.4s, v27.16b, v31.16b\n" + "ldr q27, [x25, #0x20]\n" + ".inst 0x4e88a5f2 // smmla v18.4s, v15.16b, v8.16b\n" + ".inst 0x4e9fa5f1 // smmla v17.4s, v15.16b, v31.16b\n" + "sshl v15.16b, v29.16b, v1.16b\n" + "sshl v1.16b, v3.16b, v1.16b\n" + "and v29.16b, v29.16b, v0.16b\n" + "and v3.16b, v3.16b, v0.16b\n" + "ldr q0, [x25, #0x30]\n" + "fcvtl v20.4s, v20.4h\n" + ".inst 0x4e8fa773 // smmla v19.4s, v27.16b, v15.16b\n" + "fcvtl v9.4s, v9.4h\n" + ".inst 0x4e81a77a // smmla v26.4s, v27.16b, v1.16b\n" + "ldr q27, [x25, #0x40]\n" + ".inst 0x4e8fa412 // smmla v18.4s, v0.16b, v15.16b\n" + ".inst 0x4e81a411 // smmla v17.4s, v0.16b, v1.16b\n" + "ldr q0, [x25, #0x50]\n" + ".inst 0x4e95a773 // smmla v19.4s, v27.16b, v21.16b\n" + ".inst 0x4e90a77a // smmla v26.4s, v27.16b, v16.16b\n" + "ldr q27, [x25, #0x60]\n" + ".inst 0x4e95a412 // smmla v18.4s, v0.16b, v21.16b\n" + ".inst 0x4e90a411 // smmla v17.4s, v0.16b, v16.16b\n" + "ldr q0, [x25, #0x70]\n" + "add x25, x25, #0x88\n" + ".inst 0x4e9da773 // smmla v19.4s, v27.16b, v29.16b\n" + ".inst 0x4e83a77a // smmla v26.4s, v27.16b, v3.16b\n" + "ldr d27, [x20, #0x0]\n" + ".inst 0x4e9da412 // smmla v18.4s, v0.16b, v29.16b\n" + ".inst 0x4e83a411 // smmla v17.4s, v0.16b, v3.16b\n" + "fcvtl v27.4s, v27.4h\n" + "uzp1 v0.2d, v19.2d, v26.2d\n" + "uzp2 v26.2d, v19.2d, v26.2d\n" + "fmul v19.4s, v27.4s, v20.s[0]\n" + "scvtf v0.4s, v0.4s, #0x4\n" + "scvtf v26.4s, v26.4s, #0x4\n" + "fmla v2.4s, v0.4s, v19.4s\n" + "ldr q19, [x23, #0x0]\n" + "uzp1 v0.2d, v18.2d, v17.2d\n" + "uzp2 v18.2d, v18.2d, v17.2d\n" + "fmul v17.4s, v27.4s, v20.s[1]\n" + "scvtf v0.4s, v0.4s, #0x4\n" + "scvtf v18.4s, v18.4s, #0x4\n" + "fmla v10.4s, v26.4s, v17.4s\n" + "ldr q17, [x23, #0x10]\n" + "fmul v26.4s, v27.4s, v20.s[2]\n" + "fmul v20.4s, v27.4s, v20.s[3]\n" + "fmla v12.4s, v0.4s, v26.4s\n" + "ldr d0, [x22, #-0x8]\n" + "ldr d26, [x21, #-0x8]\n" + "fcvtl v0.4s, v0.4h\n" + "fmla v28.4s, v18.4s, v20.4s\n" + "movi v20.4s, #0x0\n" + "movi v18.4s, #0x0\n" + ".inst 0x4e88a674 // smmla v20.4s, v19.16b, v8.16b\n" + ".inst 0x4e9fa672 // smmla v18.4s, v19.16b, v31.16b\n" + "ldr q19, [x23, #0x20]\n" + "fcvtl v26.4s, v26.4h\n" + ".inst 0x4e8fa674 // smmla v20.4s, v19.16b, v15.16b\n" + ".inst 0x4e81a672 // smmla v18.4s, v19.16b, v1.16b\n" + "ldr q19, [x23, #0x40]\n" + ".inst 0x4e95a674 // smmla v20.4s, v19.16b, v21.16b\n" + ".inst 0x4e90a672 // smmla v18.4s, v19.16b, v16.16b\n" + "ldr q19, [x23, #0x60]\n" + ".inst 0x4e9da674 // smmla v20.4s, v19.16b, v29.16b\n" + ".inst 0x4e83a672 // smmla v18.4s, v19.16b, v3.16b\n" + "uzp1 v19.2d, v20.2d, v18.2d\n" + "scvtf v19.4s, v19.4s, #0x4\n" + "uzp2 v20.2d, v20.2d, v18.2d\n" + "fmul v18.4s, v27.4s, v9.s[0]\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "fmla v11.4s, v19.4s, v18.4s\n" + "ldr q18, [x22, #0x0]\n" + "fmul v19.4s, v27.4s, v9.s[1]\n" + "fmla v13.4s, v20.4s, v19.4s\n" + "movi v19.4s, #0x0\n" + "movi v20.4s, #0x0\n" + ".inst 0x4e88a633 // smmla v19.4s, v17.16b, v8.16b\n" + ".inst 0x4e9fa634 // smmla v20.4s, v17.16b, v31.16b\n" + "ldr q17, [x23, #0x30]\n" + ".inst 0x4e8fa633 // smmla v19.4s, v17.16b, v15.16b\n" + ".inst 0x4e81a634 // smmla v20.4s, v17.16b, v1.16b\n" + "ldr q17, [x23, #0x50]\n" + ".inst 0x4e95a633 // smmla v19.4s, v17.16b, v21.16b\n" + ".inst 0x4e90a634 // smmla v20.4s, v17.16b, v16.16b\n" + "ldr q17, [x23, #0x70]\n" + "add x23, x23, #0x88\n" + ".inst 0x4e9da633 // smmla v19.4s, v17.16b, v29.16b\n" + ".inst 0x4e83a634 // smmla v20.4s, v17.16b, v3.16b\n" + "uzp1 v17.2d, v19.2d, v20.2d\n" + "scvtf v17.4s, v17.4s, #0x4\n" + "uzp2 v20.2d, v19.2d, v20.2d\n" + "fmul v19.4s, v27.4s, v9.s[2]\n" + "fmul v9.4s, v27.4s, v9.s[3]\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "fmla v22.4s, v17.4s, v19.4s\n" + "ldr q17, [x22, #0x10]\n" + "movi v19.4s, #0x0\n" + ".inst 0x4e88a653 // smmla v19.4s, v18.16b, v8.16b\n" + "fmla v23.4s, v20.4s, v9.4s\n" + "movi v20.4s, #0x0\n" + "movi v9.4s, #0x0\n" + ".inst 0x4e9fa654 // smmla v20.4s, v18.16b, v31.16b\n" + "ldr q18, [x22, #0x20]\n" + ".inst 0x4e88a629 // smmla v9.4s, v17.16b, v8.16b\n" + ".inst 0x4e8fa653 // smmla v19.4s, v18.16b, v15.16b\n" + ".inst 0x4e81a654 // smmla v20.4s, v18.16b, v1.16b\n" + "ldr q18, [x22, #0x40]\n" + ".inst 0x4e95a653 // smmla v19.4s, v18.16b, v21.16b\n" + ".inst 0x4e90a654 // smmla v20.4s, v18.16b, v16.16b\n" + "ldr q18, [x22, #0x60]\n" + ".inst 0x4e9da653 // smmla v19.4s, v18.16b, v29.16b\n" + ".inst 0x4e83a654 // smmla v20.4s, v18.16b, v3.16b\n" + "movi v18.4s, #0x0\n" + ".inst 0x4e9fa632 // smmla v18.4s, v17.16b, v31.16b\n" + "ldr q17, [x22, #0x30]\n" + ".inst 0x4e8fa629 // smmla v9.4s, v17.16b, v15.16b\n" + ".inst 0x4e81a632 // smmla v18.4s, v17.16b, v1.16b\n" + "ldr q17, [x22, #0x50]\n" + ".inst 0x4e95a629 // smmla v9.4s, v17.16b, v21.16b\n" + ".inst 0x4e90a632 // smmla v18.4s, v17.16b, v16.16b\n" + "ldr q17, [x22, #0x70]\n" + "add x22, x22, #0x88\n" + ".inst 0x4e9da629 // smmla v9.4s, v17.16b, v29.16b\n" + ".inst 0x4e83a632 // smmla v18.4s, v17.16b, v3.16b\n" + "uzp1 v17.2d, v19.2d, v20.2d\n" + "uzp2 v20.2d, v19.2d, v20.2d\n" + "fmul v19.4s, v27.4s, v0.s[0]\n" + "scvtf v17.4s, v17.4s, #0x4\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "fmla v25.4s, v17.4s, v19.4s\n" + "ldr q19, [x21, #0x0]\n" + "fmul v17.4s, v27.4s, v0.s[1]\n" + "fmla v5.4s, v20.4s, v17.4s\n" + "ldr q17, [x21, #0x10]\n" + "uzp1 v20.2d, v9.2d, v18.2d\n" + "uzp2 v9.2d, v9.2d, v18.2d\n" + "fmul v18.4s, v27.4s, v0.s[2]\n" + "fmul v0.4s, v27.4s, v0.s[3]\n" + "scvtf v20.4s, v20.4s, #0x4\n" + "scvtf v9.4s, v9.4s, #0x4\n" + "fmla v7.4s, v20.4s, v18.4s\n" + "movi v20.4s, #0x0\n" + "movi v18.4s, #0x0\n" + ".inst 0x4e88a674 // smmla v20.4s, v19.16b, v8.16b\n" + ".inst 0x4e9fa672 // smmla v18.4s, v19.16b, v31.16b\n" + "ldr q19, [x21, #0x20]\n" + "fmla v4.4s, v9.4s, v0.4s\n" + "movi v9.4s, #0x0\n" + "movi v0.4s, #0x0\n" + ".inst 0x4e88a629 // smmla v9.4s, v17.16b, v8.16b\n" + "fmul v8.4s, v27.4s, v26.s[0]\n" + ".inst 0x4e9fa620 // smmla v0.4s, v17.16b, v31.16b\n" + "ldr q17, [x21, #0x30]\n" + ".inst 0x4e8fa674 // smmla v20.4s, v19.16b, v15.16b\n" + "fmul v31.4s, v27.4s, v26.s[1]\n" + ".inst 0x4e81a672 // smmla v18.4s, v19.16b, v1.16b\n" + "ldr q19, [x21, #0x40]\n" + ".inst 0x4e8fa629 // smmla v9.4s, v17.16b, v15.16b\n" + "fmul v15.4s, v27.4s, v26.s[2]\n" + "fmul v27.4s, v27.4s, v26.s[3]\n" + ".inst 0x4e81a620 // smmla v0.4s, v17.16b, v1.16b\n" + "ldr q1, [x21, #0x50]\n" + ".inst 0x4e95a674 // smmla v20.4s, v19.16b, v21.16b\n" + ".inst 0x4e90a672 // smmla v18.4s, v19.16b, v16.16b\n" + "ldr q26, [x21, #0x60]\n" + ".inst 0x4e95a429 // smmla v9.4s, v1.16b, v21.16b\n" + ".inst 0x4e90a420 // smmla v0.4s, v1.16b, v16.16b\n" + "ldr q21, [x21, #0x70]\n" + "add x21, x21, #0x88\n" + ".inst 0x4e9da754 // smmla v20.4s, v26.16b, v29.16b\n" + ".inst 0x4e83a752 // smmla v18.4s, v26.16b, v3.16b\n" + ".inst 0x4e9da6a9 // smmla v9.4s, v21.16b, v29.16b\n" + ".inst 0x4e83a6a0 // smmla v0.4s, v21.16b, v3.16b\n" + "uzp1 v29.2d, v20.2d, v18.2d\n" + "uzp2 v21.2d, v20.2d, v18.2d\n" + "scvtf v29.4s, v29.4s, #0x4\n" + "uzp1 v18.2d, v9.2d, v0.2d\n" + "uzp2 v16.2d, v9.2d, v0.2d\n" + "scvtf v21.4s, v21.4s, #0x4\n" + "fmla v6.4s, v29.4s, v8.4s\n" + "scvtf v18.4s, v18.4s, #0x4\n" + "scvtf v16.4s, v16.4s, #0x4\n" + "fmla v30.4s, v21.4s, v31.4s\n" + "fmla v24.4s, v18.4s, v15.4s\n" + "fmla v14.4s, v16.4s, v27.4s\n" + "bgt 3b\n" + "mov x20, %x[res_ptr]\n" + "subs x27, x27, #0x4\n" + "add %x[res_ptr], %x[res_ptr], #0x10\n" + "str q2, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q10, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q12, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q28, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q11, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q13, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q22, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q23, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q25, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q5, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q7, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q4, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q6, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q30, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q24, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "str q14, [x20, #0x0]\n" + "bne 2b\n" + "mov x20, #0x4\n" + "sub x10, x10, #0x10\n" + "cmp x10, #0x10\n" + "mov %x[res_ptr], x26\n" + "madd %x[a_ptr], x20, x9, %x[a_ptr]\n" + "bge 1b\n" + "4:" // Row loop skip + "cbz x10, 9f\n" + "5:" // Row tail: Row loop + "add x24, %x[b_ptr], #0x8\n" + "mov x23, %x[nc]\n" + "add x22, %x[res_ptr], %x[res_stride], LSL #2\n" + "6:" // Row tail: Column loop + "movi v2.16b, #0x0\n" + "movi v10.16b, #0x0\n" + "add x25, %x[a_ptr], #0x8\n" + "mov x21, %x[nb]\n" + "movi v12.16b, #0x0\n" + "movi v28.16b, #0x0\n" + "7:" // Row tail: Block loop + "ldr q6, [x24, #0x0]\n" + "ldr q5, [x24, #0x10]\n" + "movi v17.16b, #0x4\n" + "movi v8.4s, #0x0\n" + "ldr q4, [x25, #0x0]\n" + "ldr q13, [x25, #0x10]\n" + "movi v27.4s, #0x0\n" + "movi v0.4s, #0x0\n" + "ldr q31, [x24, #0x20]\n" + "ldr q14, [x24, #0x30]\n" + "movi v29.4s, #0x0\n" + "movi v22.16b, #0xf0\n" + "ldr q11, [x25, #0x20]\n" + "ldr q23, [x25, #0x30]\n" + "sshl v21.16b, v6.16b, v17.16b\n" + "sshl v16.16b, v5.16b, v17.16b\n" + "ldr q20, [x25, #0x40]\n" + "ldr q26, [x25, #0x50]\n" + "and v6.16b, v6.16b, v22.16b\n" + "and v5.16b, v5.16b, v22.16b\n" + "ldr q25, [x25, #0x60]\n" + "ldr q3, [x25, #0x70]\n" + "sshl v19.16b, v31.16b, v17.16b\n" + "sshl v18.16b, v14.16b, v17.16b\n" + "ldr d17, [x25, #-0x8]\n" + ".inst 0x4e95a488 // smmla v8.4s, v4.16b, v21.16b\n" + ".inst 0x4e90a49b // smmla v27.4s, v4.16b, v16.16b\n" + "and v31.16b, v31.16b, v22.16b\n" + ".inst 0x4e95a5a0 // smmla v0.4s, v13.16b, v21.16b\n" + ".inst 0x4e90a5bd // smmla v29.4s, v13.16b, v16.16b\n" + "and v14.16b, v14.16b, v22.16b\n" + "sub x20, x24, #0x8\n" + "ldr d16, [x20, #0x0]\n" + "subs x21, x21, #0x1\n" + "add x25, x25, #0x88\n" + "fcvtl v17.4s, v17.4h\n" + "add x24, x24, #0x48\n" + ".inst 0x4e93a568 // smmla v8.4s, v11.16b, v19.16b\n" + ".inst 0x4e92a57b // smmla v27.4s, v11.16b, v18.16b\n" + ".inst 0x4e93a6e0 // smmla v0.4s, v23.16b, v19.16b\n" + ".inst 0x4e92a6fd // smmla v29.4s, v23.16b, v18.16b\n" + "fcvtl v16.4s, v16.4h\n" + ".inst 0x4e86a688 // smmla v8.4s, v20.16b, v6.16b\n" + ".inst 0x4e85a69b // smmla v27.4s, v20.16b, v5.16b\n" + "fmul v23.4s, v16.4s, v17.s[0]\n" + "fmul v21.4s, v16.4s, v17.s[1]\n" + "fmul v1.4s, v16.4s, v17.s[2]\n" + "fmul v20.4s, v16.4s, v17.s[3]\n" + ".inst 0x4e86a740 // smmla v0.4s, v26.16b, v6.16b\n" + ".inst 0x4e85a75d // smmla v29.4s, v26.16b, v5.16b\n" + ".inst 0x4e9fa728 // smmla v8.4s, v25.16b, v31.16b\n" + ".inst 0x4e8ea73b // smmla v27.4s, v25.16b, v14.16b\n" + ".inst 0x4e9fa460 // smmla v0.4s, v3.16b, v31.16b\n" + ".inst 0x4e8ea47d // smmla v29.4s, v3.16b, v14.16b\n" + "uzp1 v19.2d, v8.2d, v27.2d\n" + "uzp2 v18.2d, v8.2d, v27.2d\n" + "scvtf v19.4s, v19.4s, #0x4\n" + "uzp1 v17.2d, v0.2d, v29.2d\n" + "uzp2 v16.2d, v0.2d, v29.2d\n" + "scvtf v18.4s, v18.4s, #0x4\n" + "fmla v2.4s, v19.4s, v23.4s\n" + "scvtf v17.4s, v17.4s, #0x4\n" + "scvtf v16.4s, v16.4s, #0x4\n" + "fmla v10.4s, v18.4s, v21.4s\n" + "fmla v12.4s, v17.4s, v1.4s\n" + "fmla v28.4s, v16.4s, v20.4s\n" + "bgt 7b\n" + "mov x20, %x[res_ptr]\n" + "cmp x10, #0x1\n" + "str q2, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "cmp x10, #0x2\n" + "str q10, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "cmp x10, #0x3\n" + "str q12, [x20, #0x0]\n" + "add x20, x20, %x[res_stride]\n" + "ble 8f\n" + "str q28, [x20, #0x0]\n" + "8:" // Row tail: Accumulator store skip + "subs x23, x23, #0x4\n" + "add %x[res_ptr], %x[res_ptr], #0x10\n" + "bne 6b\n" + "subs x10, x10, #0x4\n" + "add %x[a_ptr], %x[a_ptr], x9\n" + "mov %x[res_ptr], x22\n" + "bgt 5b\n" + "9:" // Row tail: Row loop skip + : [a_ptr] "+&r" (a_ptr), [res_ptr] "+&r" (res_ptr) + : [b_ptr] "r" (b_ptr), [nr] "r" (nr), [nb] "r" (nb), [res_stride] "r" (res_stride), [nc] "r" (nc) + : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "x9", "x10", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28" + ); + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_MATMUL_INT8) float sumf[4][4]; int sumi; @@ -1580,7 +1571,7 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -1615,7 +1606,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) #if defined(__ARM_FEATURE_SVE) && defined(__ARM_FEATURE_MATMUL_INT8) - if (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0) { + if (ggml_cpu_get_sve_cnt() == QK8_0) { const void * b_ptr = vx; const void * a_ptr = vy; float * res_ptr = s; @@ -2049,7 +2040,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -2083,59 +2074,57 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const UNUSED(blocklen); #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) && defined(__ARM_FEATURE_DOTPROD) - if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { - const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl); + const int8x16_t kvalues = vld1q_s8(kvalues_iq4nl); - for (int y = 0; y < nr / 4; y++) { - const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb); - for (int x = 0; x < nc / ncols_interleaved; x++) { - const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb); + for (int y = 0; y < nr / 4; y++) { + const block_q8_0x4 * a_ptr = (const block_q8_0x4 *) vy + (y * nb); + for (int x = 0; x < nc / ncols_interleaved; x++) { + const block_iq4_nlx4 * b_ptr = (const block_iq4_nlx4 *) vx + (x * nb); - float32x4_t sumf[4]; - for (int m = 0; m < 4; m++) { - sumf[m] = vdupq_n_f32(0); - } + float32x4_t sumf[4]; + for (int m = 0; m < 4; m++) { + sumf[m] = vdupq_n_f32(0); + } - for (int l = 0; l < nb; l++) { - float32x4_t a_d = vcvt_f32_f16(vld1_f16((const float16_t *)a_ptr[l].d)); - float32x4_t b_d = vcvt_f32_f16(vld1_f16((const float16_t *)b_ptr[l].d)); - - int32x4_t sumi_0 = vdupq_n_s32(0); - int32x4_t sumi_1 = vdupq_n_s32(0); - int32x4_t sumi_2 = vdupq_n_s32(0); - int32x4_t sumi_3 = vdupq_n_s32(0); - - for (int k = 0; k < 4; k++) { - int8x16_t a_0 = vld1q_s8(a_ptr[l].qs + 16 * k + 0); - int8x16_t a_1 = vld1q_s8(a_ptr[l].qs + 16 * k + 64); - - uint8x16_t b = vld1q_u8(b_ptr[l].qs + 16 * k); - int8x16_t b_hi = vqtbl1q_s8(kvalues, b >> 4); - int8x16_t b_lo = vqtbl1q_s8(kvalues, b & 0xF); - - sumi_0 = vdotq_laneq_s32(sumi_0, b_lo, a_0, 0); - sumi_1 = vdotq_laneq_s32(sumi_1, b_lo, a_0, 1); - sumi_2 = vdotq_laneq_s32(sumi_2, b_lo, a_0, 2); - sumi_3 = vdotq_laneq_s32(sumi_3, b_lo, a_0, 3); - sumi_0 = vdotq_laneq_s32(sumi_0, b_hi, a_1, 0); - sumi_1 = vdotq_laneq_s32(sumi_1, b_hi, a_1, 1); - sumi_2 = vdotq_laneq_s32(sumi_2, b_hi, a_1, 2); - sumi_3 = vdotq_laneq_s32(sumi_3, b_hi, a_1, 3); - } + for (int l = 0; l < nb; l++) { + float32x4_t a_d = vcvt_f32_f16(vld1_f16((const float16_t *)a_ptr[l].d)); + float32x4_t b_d = vcvt_f32_f16(vld1_f16((const float16_t *)b_ptr[l].d)); - sumf[0] = vmlaq_f32(sumf[0], vmulq_laneq_f32(b_d, a_d, 0), vcvtq_f32_s32(sumi_0)); - sumf[1] = vmlaq_f32(sumf[1], vmulq_laneq_f32(b_d, a_d, 1), vcvtq_f32_s32(sumi_1)); - sumf[2] = vmlaq_f32(sumf[2], vmulq_laneq_f32(b_d, a_d, 2), vcvtq_f32_s32(sumi_2)); - sumf[3] = vmlaq_f32(sumf[3], vmulq_laneq_f32(b_d, a_d, 3), vcvtq_f32_s32(sumi_3)); + int32x4_t sumi_0 = vdupq_n_s32(0); + int32x4_t sumi_1 = vdupq_n_s32(0); + int32x4_t sumi_2 = vdupq_n_s32(0); + int32x4_t sumi_3 = vdupq_n_s32(0); + + for (int k = 0; k < 4; k++) { + int8x16_t a_0 = vld1q_s8(a_ptr[l].qs + 16 * k + 0); + int8x16_t a_1 = vld1q_s8(a_ptr[l].qs + 16 * k + 64); + + uint8x16_t b = vld1q_u8(b_ptr[l].qs + 16 * k); + int8x16_t b_hi = vqtbl1q_s8(kvalues, b >> 4); + int8x16_t b_lo = vqtbl1q_s8(kvalues, b & 0xF); + + sumi_0 = vdotq_laneq_s32(sumi_0, b_lo, a_0, 0); + sumi_1 = vdotq_laneq_s32(sumi_1, b_lo, a_0, 1); + sumi_2 = vdotq_laneq_s32(sumi_2, b_lo, a_0, 2); + sumi_3 = vdotq_laneq_s32(sumi_3, b_lo, a_0, 3); + sumi_0 = vdotq_laneq_s32(sumi_0, b_hi, a_1, 0); + sumi_1 = vdotq_laneq_s32(sumi_1, b_hi, a_1, 1); + sumi_2 = vdotq_laneq_s32(sumi_2, b_hi, a_1, 2); + sumi_3 = vdotq_laneq_s32(sumi_3, b_hi, a_1, 3); } - for (int m = 0; m < 4; m++) { - vst1q_f32(s + (y * 4 + m) * bs + x * 4, sumf[m]); - } + sumf[0] = vmlaq_f32(sumf[0], vmulq_laneq_f32(b_d, a_d, 0), vcvtq_f32_s32(sumi_0)); + sumf[1] = vmlaq_f32(sumf[1], vmulq_laneq_f32(b_d, a_d, 1), vcvtq_f32_s32(sumi_1)); + sumf[2] = vmlaq_f32(sumf[2], vmulq_laneq_f32(b_d, a_d, 2), vcvtq_f32_s32(sumi_2)); + sumf[3] = vmlaq_f32(sumf[3], vmulq_laneq_f32(b_d, a_d, 3), vcvtq_f32_s32(sumi_3)); + } + + for (int m = 0; m < 4; m++) { + vst1q_f32(s + (y * 4 + m) * bs + x * 4, sumf[m]); } } - return; } + return; #endif // #if ! ((defined(_MSC_VER)) && ! defined(__clang__)) && defined(__aarch64__) && defined(__ARM_NEON) { float sumf[4][4]; @@ -2159,7 +2148,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])); } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } diff --git a/ggml/src/ggml-cpu/arch/loongarch/quants.c b/ggml/src/ggml-cpu/arch/loongarch/quants.c index f2ea965724a3d..9e33fb3228633 100644 --- a/ggml/src/ggml-cpu/arch/loongarch/quants.c +++ b/ggml/src/ggml-cpu/arch/loongarch/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -474,7 +475,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Quantize these floats const float d = max_scalar / 127.f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); const float id = ( max_scalar != 0.0f ) ? 127.f / max_scalar : 0.0f; const __m256 mul = (__m256)__lasx_xvreplfr2vr_s( id ); @@ -548,7 +549,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Quantize these floats const float d = max_scalar / 127.f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); const float id = ( max_scalar != 0.0f ) ? 127.f / max_scalar : 0.0f; const __m256 mul = __lasx_xvreplfr2vr_s( id ); @@ -576,7 +577,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Compute the sum of the quants and set y[i].s const __m128i s0 = __lsx_vadd_w(__lsx_vadd_w(ni0, ni1), __lsx_vadd_w(ni2, ni3)); const __m128i s1 = __lsx_vadd_w(__lsx_vadd_w(ni4, ni5), __lsx_vadd_w(ni6, ni7)); - y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(__lsx_vadd_w(s0, s1))); + y[i].s = GGML_CPU_FP32_TO_FP16(d * hsum_i32_4(__lsx_vadd_w(s0, s1))); // Convert int32 to int16 ni0 = lsx_packs_w( ni0, ni1 ); @@ -667,7 +668,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { /* Compute combined scale for the block */ - const __m256 d = __lasx_xvreplfr2vr_s( GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d) ); + const __m256 d = __lasx_xvreplfr2vr_s( GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d) ); __m256i qx = bytes_from_nibbles_32(x[ib].qs); @@ -699,7 +700,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi for (; ib + 1 < nb; ib += 2) { // Compute combined scale for the block 0 and 1 - const __m128 d_0_1 = (__m128)__lsx_vreplgr2vr_w( GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d) ); + const __m128 d_0_1 = (__m128)__lsx_vreplgr2vr_w( GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d) ); const __m128i tmp_0_1 = __lsx_vld((const __m128i *)x[ib].qs, 0); @@ -717,7 +718,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi //_mm_prefetch(&y[ib] + 2 * sizeof(block_q8_0), _MM_HINT_T0); // Compute combined scale for the block 2 and 3 - const __m128 d_2_3 = (__m128)__lsx_vreplgr2vr_w( GGML_FP16_TO_FP32(x[ib + 1].d) * GGML_FP16_TO_FP32(y[ib + 1].d) ); + const __m128 d_2_3 = (__m128)__lsx_vreplgr2vr_w( GGML_CPU_FP16_TO_FP32(x[ib + 1].d) * GGML_CPU_FP16_TO_FP32(y[ib + 1].d) ); const __m128i tmp_2_3 = __lsx_vld((const __m128i *)x[ib + 1].qs, 0); @@ -766,7 +767,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -797,10 +798,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { - const float d0 = GGML_FP16_TO_FP32(x[ib].d); - const float d1 = GGML_FP16_TO_FP32(y[ib].d); + const float d0 = GGML_CPU_FP16_TO_FP32(x[ib].d); + const float d1 = GGML_CPU_FP16_TO_FP32(y[ib].d); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); const __m256 d0v = __lasx_xvreplfr2vr_s( d0 ); const __m256 d1v = __lasx_xvreplfr2vr_s( d1 ); @@ -834,7 +835,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -865,7 +866,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { /* Compute combined scale for the block */ - const __m256 d = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); //FIXME + const __m256 d = __lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); //FIXME __m256i qx = bytes_from_nibbles_32(x[ib].qs); __m256i bxhi = bytes_from_bits_32(x[ib].qh); @@ -902,7 +903,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -934,16 +935,16 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { - const __m256 dx = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ib].d)); + const __m256 dx = __lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(x[ib].d)); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); __m256i qx = bytes_from_nibbles_32(x[ib].qs); __m256i bxhi = bytes_from_bits_32(x[ib].qh); bxhi = __lasx_xvand_v(bxhi, __lasx_xvreplgr2vr_b(0x10)); qx = __lasx_xvor_v(qx, bxhi); - const __m256 dy = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[ib].d)); + const __m256 dy = __lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(y[ib].d)); const __m256i qy = __lasx_xvld((const __m256i *)y[ib].qs, 0); const __m256 q = mul_sum_us8_pairs_float(qx, qy); @@ -973,7 +974,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -1003,7 +1004,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { // Compute combined scale for the block - const __m256 d = __lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const __m256 d = __lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); __m256i qx = __lasx_xvld((const __m256i *)x[ib].qs, 0); __m256i qy = __lasx_xvld((const __m256i *)y[ib].qs, 0); @@ -1023,7 +1024,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -1047,8 +1048,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1116,8 +1117,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -1170,7 +1171,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q3 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; // Set up scales @@ -1294,7 +1295,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1330,8 +1331,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); memcpy(utmp, x[i].scales, 12); utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); @@ -1438,9 +1439,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1477,8 +1478,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8_t * GGML_RESTRICT q5 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); memcpy(utmp, x[i].scales, 12); utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); @@ -1593,9 +1594,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1624,7 +1625,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q4 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -1713,7 +1714,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1780,7 +1781,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = (__m256)__lasx_xvldi(0); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; __m256i sumi1 = __lasx_xvldi(0); @@ -1820,7 +1821,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; int32_t bsum = 0; @@ -1895,7 +1896,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v __m256 accumf = (__m256)__lasx_xvldi(0); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1980,7 +1981,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const uint8_t * GGML_RESTRICT sc = x[i].scales; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2049,7 +2050,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = (__m256)__lasx_xvldi(0); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8); @@ -2108,7 +2109,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const int8_t * q8 = y[i].qs; const uint8_t * qs = x[i].qs; const uint8_t * qh = x[i].qh; @@ -2168,7 +2169,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = (__m256)__lasx_xvldi(0); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2213,7 +2214,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2279,7 +2280,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = (__m256)__lasx_xvldi(0); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs; @@ -2340,7 +2341,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint8_t * GGML_RESTRICT signs = x[i].signs; @@ -2451,7 +2452,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo + (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2; } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); accum = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(d), __lasx_xvffint_s_w(sumi), accum); accum1 += d * sumi1; } @@ -2484,7 +2485,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); + sumf += GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; @@ -2530,9 +2531,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v const __m256i p16_2 = mul_add_epi8(q4b_2, q8b_2); const __m256i p_1 = lasx_madd_h(p16_1, mone); const __m256i p_2 = lasx_madd_h(p16_2, mone); - accum1 = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[ib + 0].d)*GGML_FP16_TO_FP32(x[ib + 0].d)), + accum1 = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(y[ib + 0].d)*GGML_CPU_FP16_TO_FP32(x[ib + 0].d)), __lasx_xvffint_s_w(p_1), accum1); - accum2 = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(y[ib + 1].d)*GGML_FP16_TO_FP32(x[ib + 1].d)), + accum2 = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(y[ib + 1].d)*GGML_CPU_FP16_TO_FP32(x[ib + 1].d)), __lasx_xvffint_s_w(p_2), accum2); } @@ -2540,7 +2541,7 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v #endif for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -2595,7 +2596,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v sumi1 = __lasx_xvadd_w(p_1, sumi1); sumi2 = __lasx_xvadd_w(p_2, sumi2); } - accum = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_FP16_TO_FP32(x[ibl].d)*y[ibl].d), + accum = __lasx_xvfmadd_s(__lasx_xvreplfr2vr_s(GGML_CPU_FP16_TO_FP32(x[ibl].d)*y[ibl].d), __lasx_xvffint_s_w(__lasx_xvadd_w(sumi1, sumi2)), accum); } @@ -2604,7 +2605,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v #else float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; diff --git a/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp b/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp new file mode 100644 index 0000000000000..fedd6430278c2 --- /dev/null +++ b/ggml/src/ggml-cpu/arch/powerpc/cpu-feats.cpp @@ -0,0 +1,82 @@ +# include "ggml-backend-impl.h" + +#if defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) + +#if defined(__linux__) +#include +#endif + +#include + +struct powerpc_features { + std::string platform = ""; + int power_version = -1; + + bool has_vsx = false; + + powerpc_features() { +#if defined(__linux__) + unsigned long auxval = getauxval(AT_PLATFORM); + if (auxval) { + platform = std::string(reinterpret_cast(auxval)); + // TBD: Do systems exist that return this in uppercase? + if (platform.substr(0, 5) == "power") { + // Extractt a numeric suffix, if one exists + int vpos = -1; + for (int i = platform.length() - 1; i >= 0; i--) { + if (std::isdigit(platform[i])) { + vpos = i; + } else { + break; + } + } + if (vpos > -1) { + power_version = std::stoi(platform.substr(vpos)); + } + } + } +#endif + if (power_version >= 9) { + has_vsx = true; + } + } +}; + +static int ggml_backend_cpu_powerpc_score() { + int score = 1; + powerpc_features pf; + +// Platform scores +#if defined(GGML_USE_POWER7) + if (pf.power_version < 7) { return 0; } + score += 1<<1; +#endif +#if defined(GGML_USE_POWER8) + if (pf.power_version < 8) { return 0; } + score += 1<<2; +#endif +#if defined(GGML_USE_POWER9) + if (pf.power_version < 9) { return 0; } + score += 1<<3; +#endif +#if defined(GGML_USE_POWER10) + if (pf.power_version < 10) { return 0; } + score += 1<<4; +#endif +#if defined(GGML_USE_POWER11) + if (pf.power_version < 11) { return 0; } + score += 1<<5; +#endif + +// Feature scores +#if defined(GGML_USE_VSX) + if (!pf.has_vsx) { return 0; } + score += 1<<6; +#endif + + return score; +} + +GGML_BACKEND_DL_SCORE_IMPL(ggml_backend_cpu_powerpc_score) + +#endif // defined(__powerpc64__) || defined(__ppc64__) || defined(__PPC64__) diff --git a/ggml/src/ggml-cpu/arch/powerpc/quants.c b/ggml/src/ggml-cpu/arch/powerpc/quants.c index ce4e47a863994..053d5cbdc7bd8 100644 --- a/ggml/src/ggml-cpu/arch/powerpc/quants.c +++ b/ggml/src/ggml-cpu/arch/powerpc/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -67,7 +68,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float id = d ? 1.0f/d : 0.0f; const vector float vid = vec_splats(id); - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); for (int j = 0; j < 8; j++) { const vector float v = vec_round(vec_mul(srcv[j], vid)); @@ -112,7 +113,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float id = d ? 1.0f/d : 0.0f; const vector float vid = vec_splats(id); - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); vector int accv = vec_splats(0); @@ -127,7 +128,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i accv = vec_add(accv, vec_sld(accv, accv, 4)); accv = vec_add(accv, vec_sld(accv, accv, 8)); - y[i].s = GGML_FP32_TO_FP16(d * vec_extract(accv, 0)); + y[i].s = GGML_CPU_FP32_TO_FP16(d * vec_extract(accv, 0)); } #else @@ -170,8 +171,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); vector signed char qxs = (vector signed char)vec_xl( 0, x[ib].qs); @@ -214,7 +215,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -249,12 +250,12 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); - vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[ib].m)); - vector float vys = {GGML_FP16_TO_FP32(y[ib].s), 0.0f, 0.0f, 0.0f}; + vector float vxmin = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].m)); + vector float vys = {GGML_CPU_FP16_TO_FP32(y[ib].s), 0.0f, 0.0f, 0.0f}; vsumf0 = vec_madd(vxmin, vys, vsumf0); vector signed char qxs = (vector signed char)vec_xl( 0, x[ib].qs); @@ -291,7 +292,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -326,8 +327,8 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); vector signed long long aux64x2_0 = {(uint64_t)(table_b2b_1[x[ib].qh[0]]), (uint64_t)(table_b2b_1[x[ib].qh[1]])}; @@ -379,7 +380,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -415,12 +416,12 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); - vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[ib].m)); - vector float vys = {GGML_FP16_TO_FP32(y[ib].s), 0.f, 0.f, 0.f}; + vector float vxmin = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].m)); + vector float vys = {GGML_CPU_FP16_TO_FP32(y[ib].s), 0.f, 0.f, 0.f}; vsumf0 = vec_madd(vxmin, vys, vsumf0); vector unsigned long long aux64x2_0 = {(uint64_t)(table_b2b_0[x[ib].qh[0]]), (uint64_t)(table_b2b_0[x[ib].qh[1]])}; @@ -470,7 +471,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -502,8 +503,8 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); vector signed char q8x0 = vec_xl( 0, x[ib].qs); @@ -542,7 +543,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -574,11 +575,11 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); - vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[i].dmin)); + vector float vxmin = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].dmin)); vector float vdmin = vec_mul(vxmin, vyd); vector signed short q8ysums0 = vec_xl( 0, y[i].bsums); @@ -708,8 +709,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -770,7 +771,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -962,7 +963,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1005,11 +1006,11 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); - vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[i].dmin)); + vector float vxmin = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].dmin)); vector float vdmin = vec_mul(vxmin, vyd); vector signed short q8ysums0 = vec_xl( 0, y[i].bsums); @@ -1177,9 +1178,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1222,11 +1223,11 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); - vector float vxmin = vec_splats(GGML_FP16_TO_FP32(x[i].dmin)); + vector float vxmin = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].dmin)); vector float vdmin = vec_mul(vxmin, vyd); UNUSED(kmask1); @@ -1394,9 +1395,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1432,7 +1433,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -1591,7 +1592,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1659,7 +1660,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const const uint64_t * signs64 = (const uint64_t *)keven_signs_q2xs; for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -1742,7 +1743,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; int32_t bsum = 0; @@ -1790,7 +1791,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v const uint64_t * signs64 = (const uint64_t *)keven_signs_q2xs; for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -1871,7 +1872,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const uint8_t * GGML_RESTRICT sc = x[i].scales; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1939,7 +1940,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo const vector signed char mask2 = (vector signed char)vec_xl( 0, k_mask2); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -2033,7 +2034,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const int8_t * q8 = y[i].qs; const uint8_t * qs = x[i].qs; const uint8_t * qh = x[i].qh; @@ -2096,7 +2097,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -2176,7 +2177,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2236,7 +2237,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo const vector signed char mask2 = (vector signed char)vec_xl( 0, k_mask2); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -2329,7 +2330,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint8_t * GGML_RESTRICT signs = x[i].signs; @@ -2394,7 +2395,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo vector float vsumf3 = vec_splats(0.0f); for (int i = 0; i < nb; ++i) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[i].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[i].d)); vector float vyd = vec_splats(y[i].d); vector float vd = vec_mul(vxd, vyd); @@ -2505,7 +2506,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); + sumf += GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; @@ -2546,8 +2547,8 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v __builtin_prefetch(y[ib].qs, 0, 1); - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ib].d)); - vector float vyd = vec_splats(GGML_FP16_TO_FP32(y[ib].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d)); + vector float vyd = vec_splats(GGML_CPU_FP16_TO_FP32(y[ib].d)); vector float vd = vec_mul(vxd, vyd); vector signed char qxs = (vector signed char)vec_xl( 0, x[ib].qs); @@ -2582,7 +2583,7 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v #endif for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -2620,7 +2621,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v for (int ibl = 0; ibl < nb; ++ibl) { - vector float vxd = vec_splats(GGML_FP16_TO_FP32(x[ibl].d)); + vector float vxd = vec_splats(GGML_CPU_FP16_TO_FP32(x[ibl].d)); vector float vyd = vec_splats(y[ibl].d); vector float vd = vec_mul(vxd, vyd); @@ -2697,7 +2698,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v #else float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; diff --git a/ggml/src/ggml-cpu/arch/riscv/quants.c b/ggml/src/ggml-cpu/arch/riscv/quants.c index 6f3aa94fbbe98..8b64d8adc48f4 100644 --- a/ggml/src/ggml-cpu/arch/riscv/quants.c +++ b/ggml/src/ggml-cpu/arch/riscv/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -45,7 +46,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); vfloat32m8_t x0 = __riscv_vfmul_vf_f32m8(v_x, id, vl); @@ -85,7 +86,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); vfloat32m8_t x0 = __riscv_vfmul_vf_f32m8(v_x, id, vl); @@ -102,7 +103,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // set y[i].s int sum = __riscv_vmv_x_s_i16m1_i16(vwrs); - y[i].s = GGML_FP32_TO_FP16(sum*d); + y[i].s = GGML_CPU_FP32_TO_FP16(sum*d); } #else @@ -160,7 +161,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi int sumi = __riscv_vmv_x_s_i32m1_i32(vs2); - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } #endif @@ -177,7 +178,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -225,7 +226,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi int sumi = __riscv_vmv_x_s_i32m1_i32(vs2); - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } #endif @@ -242,7 +243,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -293,7 +294,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi vint32m1_t sum = __riscv_vwredsum_vs_i16m4_i32m1(mul, zero, vl); int32_t sumi = __riscv_vmv_x_s_i32m1_i32(sum); - sumf += (GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } #endif @@ -316,7 +317,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -366,7 +367,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi vint32m1_t sum = __riscv_vwredsum_vs_i16m4_i32m1(mul, zero, vl); int32_t sumi = __riscv_vmv_x_s_i32m1_i32(sum); - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } #endif @@ -389,7 +390,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -427,7 +428,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi int sumi = __riscv_vmv_x_s_i32m1_i32(v_sum); - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } #endif @@ -438,7 +439,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -465,8 +466,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8_t * q2 = x[i].qs; const int8_t * q8 = y[i].qs; const uint8_t * sc = x[i].scales; - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); uint8_t *patmp = atmp; int vsums; int tmp; @@ -569,8 +570,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const int8_t * q8 = y[i].qs; const uint8_t * sc = x[i].scales; - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); size_t vl = 16; @@ -644,8 +645,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8_t * q2 = x[i].qs; const int8_t * q8 = y[i].qs; const uint8_t * sc = x[i].scales; - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); uint8_t *patmp = atmp; int vsums; int tmp; @@ -750,8 +751,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -916,7 +917,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi q3 += 32; q8 += 128; scale += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; sumf += d * isum; } @@ -1017,7 +1018,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; sumf += d*sum_t; @@ -1134,7 +1135,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi q3 += 32; q8 += 128; scale += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; sumf += d * isum; } break; @@ -1202,7 +1203,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1239,8 +1240,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int tmp, tmp2, sumi; __asm__ __volatile__( @@ -1361,8 +1362,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi size_t vl = 8; - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); vint16mf2_t q8sums_0 = __riscv_vlse16_v_i16mf2(y[i].bsums, 4, vl); vint16mf2_t q8sums_1 = __riscv_vlse16_v_i16mf2(y[i].bsums+1, 4, vl); @@ -1422,8 +1423,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi break; case 128: for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int tmp, tmp2, sumi; __asm__ __volatile__( @@ -1580,9 +1581,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1627,8 +1628,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8_t * GGML_RESTRICT hm = x[i].qh; const int8_t * GGML_RESTRICT q8 = y[i].qs; - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; vint16m1_t q8sums_0 = __riscv_vlse16_v_i16m1(y[i].bsums, 4, vl); vint16m1_t q8sums_1 = __riscv_vlse16_v_i16m1(y[i].bsums+1, 4, vl); @@ -1749,9 +1750,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1778,7 +1779,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * restrict q6 = x[i].ql; const uint8_t * restrict qh = x[i].qh; @@ -1862,7 +1863,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi case 256: for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q6 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -1943,7 +1944,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi case 128: for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * restrict q6 = x[i].ql; const uint8_t * restrict qh = x[i].qh; @@ -2058,7 +2059,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; diff --git a/ggml/src/ggml-cpu/arch/riscv/repack.cpp b/ggml/src/ggml-cpu/arch/riscv/repack.cpp index 0882b41024362..45c91a694820a 100644 --- a/ggml/src/ggml-cpu/arch/riscv/repack.cpp +++ b/ggml/src/ggml-cpu/arch/riscv/repack.cpp @@ -6,6 +6,7 @@ #include "ggml-impl.h" #include "ggml-cpu.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "traits.h" #include @@ -90,16 +91,16 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const vfloat32m1_t facc = __riscv_vfcvt_f_x_v_f32m1(sumi_h8, vl / 4); // vector version needs Zvfhmin extension - const float a_scale = GGML_FP16_TO_FP32(a_ptr[l].d); + const float a_scale = GGML_CPU_FP16_TO_FP32(a_ptr[l].d); const float b_scales[8] = { - GGML_FP16_TO_FP32(b_ptr[l].d[0]), - GGML_FP16_TO_FP32(b_ptr[l].d[1]), - GGML_FP16_TO_FP32(b_ptr[l].d[2]), - GGML_FP16_TO_FP32(b_ptr[l].d[3]), - GGML_FP16_TO_FP32(b_ptr[l].d[4]), - GGML_FP16_TO_FP32(b_ptr[l].d[5]), - GGML_FP16_TO_FP32(b_ptr[l].d[6]), - GGML_FP16_TO_FP32(b_ptr[l].d[7]) + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[0]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[1]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[2]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[3]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[4]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[5]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[6]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[7]) }; const vfloat32m1_t b_scales_vec = __riscv_vle32_v_f32m1(b_scales, vl / 4); const vfloat32m1_t tmp1 = __riscv_vfmul_vf_f32m1(facc, a_scale, vl / 4); @@ -129,7 +130,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -181,20 +182,20 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo // vector version needs Zvfhmin extension const float a_scales[4] = { - GGML_FP16_TO_FP32(a_ptr[l].d[0]), - GGML_FP16_TO_FP32(a_ptr[l].d[1]), - GGML_FP16_TO_FP32(a_ptr[l].d[2]), - GGML_FP16_TO_FP32(a_ptr[l].d[3]) + GGML_CPU_FP16_TO_FP32(a_ptr[l].d[0]), + GGML_CPU_FP16_TO_FP32(a_ptr[l].d[1]), + GGML_CPU_FP16_TO_FP32(a_ptr[l].d[2]), + GGML_CPU_FP16_TO_FP32(a_ptr[l].d[3]) }; const float b_scales[8] = { - GGML_FP16_TO_FP32(b_ptr[l].d[0]), - GGML_FP16_TO_FP32(b_ptr[l].d[1]), - GGML_FP16_TO_FP32(b_ptr[l].d[2]), - GGML_FP16_TO_FP32(b_ptr[l].d[3]), - GGML_FP16_TO_FP32(b_ptr[l].d[4]), - GGML_FP16_TO_FP32(b_ptr[l].d[5]), - GGML_FP16_TO_FP32(b_ptr[l].d[6]), - GGML_FP16_TO_FP32(b_ptr[l].d[7]) + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[0]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[1]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[2]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[3]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[4]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[5]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[6]), + GGML_CPU_FP16_TO_FP32(b_ptr[l].d[7]) }; const vfloat32m1_t b_scales_vec = __riscv_vle32_v_f32m1(b_scales, vl / 4); @@ -382,7 +383,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } diff --git a/ggml/src/ggml-cpu/arch/s390/quants.c b/ggml/src/ggml-cpu/arch/s390/quants.c index 26bd908757114..a840219a4fc08 100644 --- a/ggml/src/ggml-cpu/arch/s390/quants.c +++ b/ggml/src/ggml-cpu/arch/s390/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -49,7 +50,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f / d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); for (int j = 0; j < 8; j++) { const __vector float v = vec_mul(srcv[j], vec_splats(id)); @@ -94,7 +95,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f / d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); __vector int32_t acc = vec_splats(0); @@ -110,7 +111,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i acc = vec_add(acc, vi); } - y[i].s = GGML_FP32_TO_FP16(d * (acc[0] + acc[1] + acc[2] + acc[3])); + y[i].s = GGML_CPU_FP32_TO_FP16(d * (acc[0] + acc[1] + acc[2] + acc[3])); } #else GGML_UNUSED(nb); @@ -164,7 +165,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi __vector int16_t v_xy_ = v_xylso + v_xylse + v_xyhso + v_xyhse; v_xy_ += vec_reve(v_xy_); const __vector float v_xy = vec_float(vec_unpackh(v_xy_)); - const __vector float v_d = vec_splats(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const __vector float v_d = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); acc = vec_madd(v_xy, v_d, acc); } @@ -185,7 +186,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -219,7 +220,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi __builtin_prefetch(x[ib].qs, 0, 1); __builtin_prefetch(y[ib].qs, 0, 1); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); const uint8x16_t v_x = vec_xl(0, x[ib].qs); const int8x16_t v_xl = (const int8x16_t)(v_x & v_m); @@ -231,7 +232,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const int32x4_t v_xy_ = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_xl, v_yl), v_xh, v_yh); const float32x4_t v_xy = vec_float(v_xy_); - const float32x4_t v_d = vec_splats(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const float32x4_t v_d = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); acc = vec_madd(v_xy, v_d, acc); } @@ -252,7 +253,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -290,7 +291,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi const int32x4_t v_xy_ = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_xl, v_yl), v_xh, v_yh); const float32x4_t v_xy = vec_float(v_xy_); - const float32x4_t v_d = vec_splats(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const float32x4_t v_d = vec_splats(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); acc = vec_madd(v_xy, v_d, acc); } @@ -305,7 +306,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -348,7 +349,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sum = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * restrict x0l = x[i].qs; const uint8_t * restrict x0h = x[i].hmask; @@ -497,7 +498,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -537,8 +538,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const int16x8_t v_ysumsl = vec_xl(0 , y[i].bsums); const int16x8_t v_ysumsh = vec_xl(16, y[i].bsums); @@ -647,9 +648,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -698,8 +699,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const int16x8_t v_ysumsl = vec_xl(0 , y[i].bsums); const int16x8_t v_ysumsh = vec_xl(16, y[i].bsums); @@ -819,9 +820,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -859,7 +860,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi int8x16_t v_y[4]; for (int i = 0; i < nb; ++i) { - const float d_all = GGML_FP16_TO_FP32(x[i].d); + const float d_all = GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT x0l = x[i].ql; const uint8_t * GGML_RESTRICT x0h = x[i].qh; @@ -1004,7 +1005,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1071,7 +1072,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi // float sumf = 0; // for (int i = 0; i < nb; ++i) { -// const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; +// const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; // const uint16_t * GGML_RESTRICT q2 = x[i].qs; // const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1121,7 +1122,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi // float sumf = 0.f; // for (int i = 0; i < nb; ++i) { -// const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; +// const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; // const uint16_t * GGML_RESTRICT q2 = x[i].qs; // const int8_t * GGML_RESTRICT q8 = y[i].qs; // int32_t bsum = 0; @@ -1182,12 +1183,12 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v const int8x16_t v_yh = vec_xl(QK8_0/2, y0->qs); const int32x4_t v_xy = ggml_vec_dot(ggml_vec_dot(vec_splats(0), v_xl, v_yl), v_xh, v_yh); - sumf += GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d) * (v_xy[0] + v_xy[1] + v_xy[2] + v_xy[3]); + sumf += GGML_CPU_FP16_TO_FP32(x0->d) * GGML_CPU_FP16_TO_FP32(y0->d) * (v_xy[0] + v_xy[1] + v_xy[2] + v_xy[3]); } #endif for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -1257,7 +1258,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v sumi2 += (vsumi1[0] + vsumi1[1] + vsumi1[2] + vsumi1[3]) * ls2; } - sumf += GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d * (sumi1 + sumi2); + sumf += GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d * (sumi1 + sumi2); } *s = sumf; @@ -1265,7 +1266,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v #else float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; diff --git a/ggml/src/ggml-cpu/arch/wasm/quants.c b/ggml/src/ggml-cpu/arch/wasm/quants.c index 4ec97f533f1e4..b0904d8a3ab5e 100644 --- a/ggml/src/ggml-cpu/arch/wasm/quants.c +++ b/ggml/src/ggml-cpu/arch/wasm/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -65,7 +66,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); for (int j = 0; j < 8; j++) { const v128_t v = wasm_f32x4_mul(srcv[j], wasm_f32x4_splat(id)); @@ -110,7 +111,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i const float d = amax / ((1 << 7) - 1); const float id = d ? 1.0f/d : 0.0f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); v128_t accv = wasm_i32x4_splat(0); @@ -126,7 +127,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i accv = wasm_i32x4_add(accv, vi); } - y[i].s = GGML_FP32_TO_FP16( + y[i].s = GGML_CPU_FP32_TO_FP16( d * (wasm_i32x4_extract_lane(accv, 0) + wasm_i32x4_extract_lane(accv, 1) + wasm_i32x4_extract_lane(accv, 2) + @@ -324,8 +325,8 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi ); // Accumulate results with scaling - float scale0 = GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d); - float scale1 = GGML_FP16_TO_FP32(x1->d) * GGML_FP16_TO_FP32(y1->d); + float scale0 = GGML_CPU_FP16_TO_FP32(x0->d) * GGML_CPU_FP16_TO_FP32(y0->d); + float scale1 = GGML_CPU_FP16_TO_FP32(x1->d) * GGML_CPU_FP16_TO_FP32(y1->d); sumv = wasm_f32x4_add(sumv, wasm_f32x4_mul(wasm_f32x4_convert_i32x4(dp0), wasm_f32x4_splat(scale0))); sumv = wasm_f32x4_add(sumv, wasm_f32x4_mul(wasm_f32x4_convert_i32x4(dp1), wasm_f32x4_splat(scale1))); @@ -348,7 +349,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -428,7 +429,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi wasm_i32x4_dot_i16x8(v0lfh, v1lh)), wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl), wasm_i32x4_dot_i16x8(v0hfh, v1hh)))), - wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d)))); + wasm_f32x4_splat(GGML_CPU_FP16_TO_FP32(x0->d) * GGML_CPU_FP16_TO_FP32(y0->d)))); } sumf = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) + @@ -454,7 +455,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -491,7 +492,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi const block_q5_1 * GGML_RESTRICT x0 = &x[ib]; const block_q8_1 * GGML_RESTRICT y0 = &y[ib]; - summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s); + summs += GGML_CPU_FP16_TO_FP32(x0->m) * GGML_CPU_FP16_TO_FP32(y0->s); const v128_t m4b = wasm_i8x16_splat(0x0F); @@ -538,7 +539,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi wasm_i32x4_dot_i16x8(v0lfh, v1lh)), wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl), wasm_i32x4_dot_i16x8(v0hfh, v1hh)))), - wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d)))); + wasm_f32x4_splat(GGML_CPU_FP16_TO_FP32(x0->d) * GGML_CPU_FP16_TO_FP32(y0->d)))); } sumf = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) + @@ -564,7 +565,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -620,7 +621,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi const v128_t sum_dots = wasm_i32x4_add(wasm_i32x4_add(dx0_0, dx0_1), wasm_i32x4_add(dx1_0, dx1_1)); // Convert to float and accumulate - const float scale = GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d); + const float scale = GGML_CPU_FP16_TO_FP32(x0->d) * GGML_CPU_FP16_TO_FP32(y0->d); sumv = wasm_f32x4_add(sumv, wasm_f32x4_mul(wasm_f32x4_convert_i32x4(sum_dots), wasm_f32x4_splat(scale))); } @@ -635,7 +636,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -746,8 +747,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi isum += wasm_i32x4_extract_lane(isum_vec, 0); } - const float dall = GGML_FP16_TO_FP32(x[i].d) * y[i].d; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dall = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf += dall * isum - dmin * summs; } @@ -768,8 +769,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -880,7 +881,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi } // Accumulate results - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const v128_t v_d = wasm_f32x4_splat(d); v128_t v_sum = wasm_f32x4_add( wasm_f32x4_mul(wasm_f32x4_convert_i32x4(v_acc0), v_d), @@ -957,7 +958,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -991,8 +992,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Corrected sign + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); // Corrected sign const uint8_t * GGML_RESTRICT q4 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1136,9 +1137,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1170,8 +1171,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi float sumf = 0; for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); // Fixed sign + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); // Fixed sign const uint8_t * GGML_RESTRICT q5 = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -1331,9 +1332,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1420,7 +1421,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi wasm_v128_store(&aux32[0], acc0); wasm_v128_store(&aux32[4], acc1); - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) { sums[l] += d * aux32[l]; } @@ -1470,7 +1471,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; diff --git a/ggml/src/ggml-cpu/arch/x86/quants.c b/ggml/src/ggml-cpu/arch/x86/quants.c index e3f722b52c9b2..e7527c00a8f17 100644 --- a/ggml/src/ggml-cpu/arch/x86/quants.c +++ b/ggml/src/ggml-cpu/arch/x86/quants.c @@ -3,6 +3,7 @@ #include "ggml-quants.h" #include "ggml-impl.h" #include "ggml-cpu.h" +#include "simd-mappings.h" #include "../../quants.h" #include "../../ggml-cpu-impl.h" @@ -256,9 +257,9 @@ static inline __m256 mul_sum_i8_quad_float(const __m128i x_1_0, const __m128i x_ // quad fp16 delta calculation static inline __m256 quad_fp16_delta_float(const float x0, const float y0, const float x1, const float y1) { - // GGML_FP16_TO_FP32 is faster than Intel F16C - return _mm256_set_m128(_mm_set1_ps(GGML_FP16_TO_FP32(x1) * GGML_FP16_TO_FP32(y1)), - _mm_set1_ps(GGML_FP16_TO_FP32(x0) * GGML_FP16_TO_FP32(y0))); + // GGML_CPU_FP16_TO_FP32 is faster than Intel F16C + return _mm256_set_m128(_mm_set1_ps(GGML_CPU_FP16_TO_FP32(x1) * GGML_CPU_FP16_TO_FP32(y1)), + _mm_set1_ps(GGML_CPU_FP16_TO_FP32(x0) * GGML_CPU_FP16_TO_FP32(y0))); } #endif #elif defined(__SSSE3__) @@ -305,7 +306,7 @@ void quantize_row_q8_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Quantize these floats const float d = maxScalar / 127.f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f; const __m256 mul = _mm256_set1_ps( id ); @@ -401,7 +402,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Quantize these floats const float d = max_scalar / 127.f; - y[i].d = GGML_FP32_TO_FP16(d); + y[i].d = GGML_CPU_FP32_TO_FP16(d); const float id = ( max_scalar != 0.0f ) ? 127.f / max_scalar : 0.0f; const __m256 mul = _mm256_set1_ps( id ); @@ -425,7 +426,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i #if defined(__AVX2__) // Compute the sum of the quants and set y[i].s - y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3)))); + y[i].s = GGML_CPU_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3)))); // Convert int32 to int16 i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15 @@ -455,7 +456,7 @@ void quantize_row_q8_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, i // Compute the sum of the quants and set y[i].s const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3)); const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7)); - y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1))); + y[i].s = GGML_CPU_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1))); // Convert int32 to int16 ni0 = _mm_packs_epi32( ni0, ni1 ); @@ -552,7 +553,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { /* Compute combined scale for the block */ - const __m256 d = _mm256_set1_ps( GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d) ); + const __m256 d = _mm256_set1_ps( GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d) ); __m256i qx = bytes_from_nibbles_32(x[ib].qs); @@ -613,7 +614,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi _mm_prefetch(&y[ib] + sizeof(block_q8_0), _MM_HINT_T0); // Compute combined scale for the block 0 and 1 - const __m128 d_0_1 = _mm_set1_ps( GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d) ); + const __m128 d_0_1 = _mm_set1_ps( GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d) ); const __m128i tmp_0_1 = _mm_loadu_si128((const __m128i *)x[ib].qs); @@ -631,7 +632,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi _mm_prefetch(&y[ib] + 2 * sizeof(block_q8_0), _MM_HINT_T0); // Compute combined scale for the block 2 and 3 - const __m128 d_2_3 = _mm_set1_ps( GGML_FP16_TO_FP32(x[ib + 1].d) * GGML_FP16_TO_FP32(y[ib + 1].d) ); + const __m128 d_2_3 = _mm_set1_ps( GGML_CPU_FP16_TO_FP32(x[ib + 1].d) * GGML_CPU_FP16_TO_FP32(y[ib + 1].d) ); const __m128i tmp_2_3 = _mm_loadu_si128((const __m128i *)x[ib + 1].qs); @@ -680,7 +681,7 @@ void ggml_vec_dot_q4_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; @@ -711,10 +712,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { - const float d0 = GGML_FP16_TO_FP32(x[ib].d); - const float d1 = GGML_FP16_TO_FP32(y[ib].d); + const float d0 = GGML_CPU_FP16_TO_FP32(x[ib].d); + const float d1 = GGML_CPU_FP16_TO_FP32(y[ib].d); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); const __m256 d0v = _mm256_set1_ps( d0 ); const __m256 d1v = _mm256_set1_ps( d1 ); @@ -752,7 +753,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -783,7 +784,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { /* Compute combined scale for the block */ - const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const __m256 d = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); __m256i qx = bytes_from_nibbles_32(x[ib].qs); __m256i bxhi = bytes_from_bits_32(x[ib].qh); @@ -807,7 +808,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { /* Compute combined scale for the block */ - const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const __m256 d = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); __m256i bx_0 = bytes_from_nibbles_32(x[ib].qs); const __m256i bxhi = bytes_from_bits_32(x[ib].qh); @@ -851,7 +852,7 @@ void ggml_vec_dot_q5_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; @@ -883,16 +884,16 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { - const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d)); + const __m256 dx = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ib].d)); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); __m256i qx = bytes_from_nibbles_32(x[ib].qs); __m256i bxhi = bytes_from_bits_32(x[ib].qh); bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10)); qx = _mm256_or_si256(qx, bxhi); - const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[ib].d)); + const __m256 dy = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y[ib].d)); const __m256i qy = _mm256_loadu_si256((const __m256i *)y[ib].qs); const __m256 q = mul_sum_us8_pairs_float(qx, qy); @@ -910,9 +911,9 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { - const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d)); + const __m256 dx = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ib].d)); - summs += GGML_FP16_TO_FP32(x[ib].m) * GGML_FP16_TO_FP32(y[ib].s); + summs += GGML_CPU_FP16_TO_FP32(x[ib].m) * GGML_CPU_FP16_TO_FP32(y[ib].s); __m256i bx_0 = bytes_from_nibbles_32(x[ib].qs); const __m256i bxhi = bytes_from_bits_32(x[ib].qh); @@ -926,7 +927,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi bxh = _mm_or_si128(bxh, bxhih); bx_0 = MM256_SET_M128I(bxh, bxl); - const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[ib].d)); + const __m256 dy = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y[ib].d)); const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[ib].qs); const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0); @@ -956,7 +957,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * GGML_RESTRICT s, size_t bs, const voi } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; @@ -986,7 +987,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi // Main loop for (; ib < nb; ++ib) { // Compute combined scale for the block - const __m256 d = _mm256_set1_ps(GGML_FP16_TO_FP32(x[ib].d) * GGML_FP16_TO_FP32(y[ib].d)); + const __m256 d = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ib].d) * GGML_CPU_FP16_TO_FP32(y[ib].d)); __m256i qx = _mm256_loadu_si256((const __m256i *)x[ib].qs); __m256i qy = _mm256_loadu_si256((const __m256i *)y[ib].qs); @@ -1025,7 +1026,7 @@ void ggml_vec_dot_q8_0_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const voi sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; @@ -1144,7 +1145,7 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } const __m256i ysum = _mm256_loadu_si256((const __m256i *) y[i].bsums); - const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)); + const __m256 d = _mm256_set1_ps(y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d)); sumi0 = _mm256_sub_epi16(sumi0, ysum); sumi0 = _mm256_add_epi16(sumi0, _mm256_add_epi16(sumi1, sumi2)); @@ -1190,7 +1191,7 @@ void ggml_vec_dot_tq1_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } } - sumf += (float) sum * (GGML_FP16_TO_FP32(x[i].d) * y[i].d); + sumf += (float) sum * (GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d); } *s = sumf; @@ -1244,7 +1245,7 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } const __m256i ysum = _mm256_loadu_si256((const __m256i *) y[i].bsums); - const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)); + const __m256 d = _mm256_set1_ps(y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d)); sumi0 = _mm256_add_epi16(sumi0, sumi1); sumi0 = _mm256_sub_epi16(sumi0, ysum); @@ -1269,7 +1270,7 @@ void ggml_vec_dot_tq2_0_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo } } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); sumf += (float) sumi * d; } @@ -1299,8 +1300,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1366,8 +1367,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1477,8 +1478,8 @@ void ggml_vec_dot_q2_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -1533,7 +1534,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q3 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1638,7 +1639,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q3 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -1824,7 +1825,7 @@ void ggml_vec_dot_q3_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -1862,8 +1863,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); memcpy(utmp, x[i].scales, 12); utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); @@ -1928,8 +1929,8 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q4 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2049,9 +2050,9 @@ void ggml_vec_dot_q4_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2092,8 +2093,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi const uint8_t * GGML_RESTRICT q5 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); memcpy(utmp, x[i].scales, 12); utmp[3] = ((utmp[2] >> 4) & kmask2) | (((utmp[1] >> 6) & kmask3) << 4); @@ -2170,8 +2171,8 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = -y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = -y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); const uint8_t * GGML_RESTRICT q5 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2311,9 +2312,9 @@ void ggml_vec_dot_q5_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2344,7 +2345,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q4 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -2422,7 +2423,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int i = 0; i < nb; ++i) { - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); const uint8_t * GGML_RESTRICT q4 = x[i].ql; const uint8_t * GGML_RESTRICT qh = x[i].qh; @@ -2555,7 +2556,7 @@ void ggml_vec_dot_q6_K_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const voi for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; @@ -2622,7 +2623,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; __m256i sumi1 = _mm256_setzero_si256(); @@ -2663,7 +2664,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; __m128i sumi1_0 = _mm_setzero_si128(); @@ -2717,7 +2718,7 @@ void ggml_vec_dot_iq2_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; int32_t bsum = 0; @@ -2792,7 +2793,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -2913,7 +2914,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3035,7 +3036,7 @@ void ggml_vec_dot_iq2_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const uint8_t * GGML_RESTRICT sc = x[i].scales; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3104,7 +3105,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8); @@ -3177,7 +3178,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)(x[i].qs + QK_K/8); @@ -3253,7 +3254,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0; for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const int8_t * q8 = y[i].qs; const uint8_t * qs = x[i].qs; const uint8_t * qh = x[i].qh; @@ -3313,7 +3314,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3358,7 +3359,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3414,7 +3415,7 @@ void ggml_vec_dot_iq3_xxs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -3480,7 +3481,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs; @@ -3565,7 +3566,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo __m256 accumf = _mm256_setzero_ps(); for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint16_t * GGML_RESTRICT signs = (const uint16_t *)x[i].signs; @@ -3648,7 +3649,7 @@ void ggml_vec_dot_iq3_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint8_t * GGML_RESTRICT signs = x[i].signs; @@ -3753,7 +3754,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo + (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2; } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); accum = _mm256_fmadd_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(sumi), accum); accum1 += d * sumi1; @@ -3801,7 +3802,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo + (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2; } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); accum = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(MM256_SET_M128I(sumi1_1, sumi1_0))), accum); accum1 += d * sumi1; @@ -3835,7 +3836,7 @@ void ggml_vec_dot_iq1_s_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); + sumf += GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; @@ -3947,7 +3948,7 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 8; qh += 4; } - const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(scale.f16)); + const __m256 d = _mm256_set1_ps(y[i].d * GGML_CPU_FP16_TO_FP32(scale.f16)); accum1 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi1), accum1); accum2 = _mm256_fmadd_ps(d, _mm256_cvtepi32_ps(sumi2), accum2); @@ -4033,7 +4034,7 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qs += 8; qh += 4; } - const __m256 d = _mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(scale.f16)); + const __m256 d = _mm256_set1_ps(y[i].d * GGML_CPU_FP16_TO_FP32(scale.f16)); accum1 = _mm256_add_ps(_mm256_mul_ps(d, _mm256_cvtepi32_ps(MM256_SET_M128I(sumi1_1, sumi1_0))), accum1); accum2 = _mm256_add_ps(_mm256_mul_ps(d, _mm256_cvtepi32_ps(MM256_SET_M128I(sumi2_1, sumi2_0))), accum2); @@ -4083,7 +4084,7 @@ void ggml_vec_dot_iq1_m_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo qh += 2; } - sumf += GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); + sumf += GGML_CPU_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); } *s = sumf; @@ -4129,9 +4130,9 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v const __m256i p16_2 = mul_add_epi8(q4b_2, q8b_2); const __m256i p_1 = _mm256_madd_epi16(p16_1, mone); const __m256i p_2 = _mm256_madd_epi16(p16_2, mone); - accum1 = _mm256_fmadd_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 0].d)*GGML_FP16_TO_FP32(x[ib + 0].d)), + accum1 = _mm256_fmadd_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y[ib + 0].d)*GGML_CPU_FP16_TO_FP32(x[ib + 0].d)), _mm256_cvtepi32_ps(p_1), accum1); - accum2 = _mm256_fmadd_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(y[ib + 1].d)*GGML_FP16_TO_FP32(x[ib + 1].d)), + accum2 = _mm256_fmadd_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(y[ib + 1].d)*GGML_CPU_FP16_TO_FP32(x[ib + 1].d)), _mm256_cvtepi32_ps(p_2), accum2); } @@ -4164,7 +4165,7 @@ void ggml_vec_dot_iq4_nl_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const v #endif for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -4219,7 +4220,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v sumi1 = _mm256_add_epi32(p_1, sumi1); sumi2 = _mm256_add_epi32(p_2, sumi2); } - accum = _mm256_fmadd_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(x[ibl].d)*y[ibl].d), + accum = _mm256_fmadd_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ibl].d)*y[ibl].d), _mm256_cvtepi32_ps(_mm256_add_epi32(sumi1, sumi2)), accum); } @@ -4267,7 +4268,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v } __m128i sumi12_0 = _mm_add_epi32(sumi1_0, sumi2_0); __m128i sumi12_1 = _mm_add_epi32(sumi1_1, sumi2_1); - accum = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_FP16_TO_FP32(x[ibl].d)*y[ibl].d), + accum = _mm256_add_ps(_mm256_mul_ps(_mm256_set1_ps(GGML_CPU_FP16_TO_FP32(x[ibl].d)*y[ibl].d), _mm256_cvtepi32_ps(MM256_SET_M128I(sumi12_1, sumi12_0))), accum); } @@ -4276,7 +4277,7 @@ void ggml_vec_dot_iq4_xs_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const v #else float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; diff --git a/ggml/src/ggml-cpu/arch/x86/repack.cpp b/ggml/src/ggml-cpu/arch/x86/repack.cpp index e7635a294a796..c00c1e541cb44 100644 --- a/ggml/src/ggml-cpu/arch/x86/repack.cpp +++ b/ggml/src/ggml-cpu/arch/x86/repack.cpp @@ -6,6 +6,7 @@ #include "ggml-impl.h" #include "ggml-cpu.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "traits.h" #include @@ -39,11 +40,11 @@ static inline __m512 __avx512_f32cx8x2_load(ggml_fp16_t *x, ggml_fp16_t *y) { float tmp[16]; for (int i = 0; i < 8; i++) { - tmp[i] = GGML_FP16_TO_FP32(x[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(x[i]); } for (int i = 0; i < 8; i++) { - tmp[i + 8] = GGML_FP16_TO_FP32(y[i]); + tmp[i + 8] = GGML_CPU_FP16_TO_FP32(y[i]); } return _mm512_loadu_ps(tmp); @@ -54,10 +55,10 @@ static inline __m512 __avx512_repeat_f32cx16_load(__m128i x) { _mm_storeu_si128((__m128i*)tmphalf, x); for (int i = 0; i < 4; i++) { - tmp[i] = GGML_FP16_TO_FP32(tmphalf[i]); - tmp[i + 4] = GGML_FP16_TO_FP32(tmphalf[i]); - tmp[i + 8] = GGML_FP16_TO_FP32(tmphalf[i]); - tmp[i + 12] = GGML_FP16_TO_FP32(tmphalf[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(tmphalf[i]); + tmp[i + 4] = GGML_CPU_FP16_TO_FP32(tmphalf[i]); + tmp[i + 8] = GGML_CPU_FP16_TO_FP32(tmphalf[i]); + tmp[i + 12] = GGML_CPU_FP16_TO_FP32(tmphalf[i]); } return _mm512_loadu_ps(tmp); @@ -67,7 +68,7 @@ static inline __m256 __avx_f32cx8_load(ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 8; i++) { - tmp[i] = GGML_FP16_TO_FP32(x[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(x[i]); } return _mm256_loadu_ps(tmp); @@ -76,8 +77,8 @@ static inline __m256 __avx_repeat_f32cx8_load(ggml_fp16_t *x) { float tmp[8]; for (int i = 0; i < 4; i++) { - tmp[i] = GGML_FP16_TO_FP32(x[i]); - tmp[i + 4] = GGML_FP16_TO_FP32(x[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(x[i]); + tmp[i + 4] = GGML_CPU_FP16_TO_FP32(x[i]); } return _mm256_loadu_ps(tmp); @@ -88,7 +89,7 @@ static inline __m256 __avx_rearranged_f32cx8_load(ggml_fp16_t *x, __m128i arrang _mm_storeu_si128((__m128i*)tmphalf, _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *) x), arrangeMask)); for (int i = 0; i < 8; i++) { - tmp[i] = GGML_FP16_TO_FP32(tmphalf[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(tmphalf[i]); } return _mm256_loadu_ps(tmp); @@ -211,7 +212,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR id[row_iter] = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f; //d ? 1.0f / d : 0.0f; // Store the scale for the individual block - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); // Store the values in blocks of eight values - Aim is to use these later for block interleaving srcv[row_iter][0] = v0; @@ -297,7 +298,7 @@ void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTR const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < QK8_0 * 4; j++) { @@ -647,7 +648,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const __m256 col_scale_f32 = GGML_F32Cx8_REARRANGE_LOAD(b_ptr[b].d, changemask); // Load and convert to FP32 scale from block_q8_0 - const __m256 row_scale_f32 = _mm256_set1_ps(GGML_FP16_TO_FP32(a_ptr[b].d)); + const __m256 row_scale_f32 = _mm256_set1_ps(GGML_CPU_FP16_TO_FP32(a_ptr[b].d)); // Load the block values in block_q8_0 in batches of 16 bytes and replicate the same across 256 bit vector __m256i lhs_vec_0 = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)a_ptr[b].qs)); @@ -706,7 +707,7 @@ void ggml_gemv_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -972,13 +973,13 @@ void ggml_gemv_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi2 = sumi2 * scales_1[j]; sumi += sumi1 + sumi2; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d; + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d; } } for (int sb = 0; sb < 8; sb++) { uint8_t *mins = (uint8_t*) utmp + 8 + sb * 16; for (int j = 0; j < ncols_interleaved; j++) { - sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d; + sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d; } } } @@ -1755,7 +1756,7 @@ void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -3259,7 +3260,7 @@ void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo sumi2 = sumi2 * scales_1[j]; sumi += sumi1 + sumi2; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m]; + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m]; } } } @@ -3268,7 +3269,7 @@ void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const vo for(int m = 0; m < 4; m++) { const int16_t *bsums = a_ptr[l].bsums + (sb * 8) + (m * 4) - ((sb % 2) * 6); for(int j = 0; j < ncols_interleaved; j++) { - sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m]; + sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m]; } } } diff --git a/ggml/src/ggml-cpu/common.h b/ggml/src/ggml-cpu/common.h index 5624176cce94b..353563dc35c5d 100644 --- a/ggml/src/ggml-cpu/common.h +++ b/ggml/src/ggml-cpu/common.h @@ -4,6 +4,7 @@ #include "traits.h" #include "ggml-cpu-impl.h" #include "ggml-impl.h" +#include "simd-mappings.h" #ifdef __cplusplus @@ -12,11 +13,11 @@ // convenience functions/macros for use in template calls // note: these won't be required after the 'traits' lookup table is used. static inline ggml_fp16_t f32_to_f16(float x) { - return GGML_FP32_TO_FP16(x); + return GGML_CPU_FP32_TO_FP16(x); } static inline float f16_to_f32(ggml_fp16_t x) { - return GGML_FP16_TO_FP32(x); + return GGML_CPU_FP16_TO_FP32(x); } static inline ggml_bf16_t f32_to_bf16(float x) { diff --git a/ggml/src/ggml-cpu/ggml-cpu-impl.h b/ggml/src/ggml-cpu/ggml-cpu-impl.h index 69415daa82025..d839cf5c55e81 100644 --- a/ggml/src/ggml-cpu/ggml-cpu-impl.h +++ b/ggml/src/ggml-cpu/ggml-cpu-impl.h @@ -62,11 +62,17 @@ struct ggml_compute_params { #if defined(__s390x__) && defined(__VEC__) #ifndef __VXE__ #define __VXE__ -#endif +#endif // __VXE__ #ifndef __VXE2__ #define __VXE2__ -#endif -#endif +#endif // __VXE2__ +#endif // __s390x__ && __VEC__ + +#if defined(__s390x__) && defined(GGML_NNPA) +#ifndef __NNPA__ +#define __NNPA__ +#endif // __NNPA__ +#endif // __s390x__ && GGML_NNPA #if defined(__ARM_FEATURE_SVE) #include @@ -371,7 +377,7 @@ inline static int32x4_t ggml_vdotq_s32(int32x4_t acc, int8x16_t a, int8x16_t b) #define vec_xor(a, b) ((a) ^ (b)) // Vector XOR #endif -typedef signed char char8x16_t __attribute__((vector_size(16))); +typedef signed char char8x16_t __attribute__((vector_size(16))); typedef unsigned char uchar8x16_t __attribute__((vector_size(16))); typedef int8_t int8x16_t __attribute__((vector_size(16))); @@ -382,10 +388,10 @@ typedef uint8_t uint8x16_t __attribute__((vector_size(16))); typedef uint16_t uint16x8_t __attribute__((vector_size(16))); typedef uint32_t uint32x4_t __attribute__((vector_size(16))); -typedef float float32x4_t __attribute__((vector_size(16))); -typedef double double64x2_t __attribute((vector_size(16))); +typedef float float32x4_t __attribute__((vector_size(16))); +typedef double double64x2_t __attribute__((vector_size(16))); -typedef signed long long long64x2_t __attribute((vector_size(16))); +typedef signed long long long64x2_t __attribute__((vector_size(16))); typedef unsigned long long ulong64x2_t __attribute__((vector_size(16))); typedef struct ggml_uint8x16x2_t { @@ -503,31 +509,9 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) { // TODO: move to ggml-threading void ggml_barrier(struct ggml_threadpool * tp); +void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value); +int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value); + #ifdef __cplusplus } #endif - -#define GGML_DO_PRAGMA_(x) _Pragma (#x) -#define GGML_DO_PRAGMA(x) GGML_DO_PRAGMA_(x) -#if defined(GGML_CPU_GENERIC) || defined(__HIPCC__) -// Note for Apple targets: -// - clang: aliases are not supported on darwin -// - all native kernels need to be implemented in both x86 and arm files -// - on iOS, tvOS, and visionOS, if cmake cannot determine the target architecture, all `_generic` names are replaced by defines -# define GGML_WEAK_ALIAS(name, alias) -#elif defined(__GNUC__) -// GCC/Clang on *nix -# define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(weak name = alias) // NOLINT -#elif defined(_MSC_VER) && defined(_WIN64) -// MSVC -// Note: C name mangling varies across different calling conventions -// see https://learn.microsoft.com/en-us/cpp/build/reference/decorated-names?view=msvc-170 -# define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(comment(linker, "/alternatename:" #name "=" #alias)) -#elif defined(_MSC_VER) && defined(WIN32) -// ref: https://github.com/ggml-org/whisper.cpp/pull/3239#issuecomment-2958224591 -# define GGML_WEAK_ALIAS(name, alias) GGML_DO_PRAGMA(comment(linker, "/alternatename:_" #name "=_" #alias)) -#else -# error "Unsupported compiler for GGML_WEAK_ALIAS" -#endif - -#define GGML_CPU_NATIVE_IMPL(name) GGML_WEAK_ALIAS(name, name ## _generic) diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index ff28bf98bc7df..c5271b7757228 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -72,15 +72,13 @@ #define UNUSED GGML_UNUSED #define SWAP(x, y, T) do { T SWAP = x; (x) = y; (y) = SWAP; } while (0) +// precomputed f32 table for f16 (256 KB) (simd-mappings.h) +float ggml_table_f32_f16[1 << 16]; + #if defined(__ARM_ARCH) struct ggml_arm_arch_features_type { - int has_neon; - int has_dotprod; - int has_i8mm; - int has_sve; int sve_cnt; - int has_sme; -} ggml_arm_arch_features = {-1, -1, -1, -1, 0, -1}; +} ggml_arm_arch_features = { 0 }; #endif @@ -197,6 +195,7 @@ typedef pthread_t ggml_thread_t; static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = { [GGML_TYPE_F32] = { + .from_float = (ggml_from_float_t) ggml_cpu_fp32_to_fp32, .vec_dot = (ggml_vec_dot_t) ggml_vec_dot_f32, .vec_dot_type = GGML_TYPE_F32, .nrows = 1, @@ -559,6 +558,14 @@ void ggml_barrier(struct ggml_threadpool * tp) { #endif } +void ggml_threadpool_chunk_set(struct ggml_threadpool * tp, int value) { + atomic_store_explicit(&tp->current_chunk, value, memory_order_relaxed); +} + +int ggml_threadpool_chunk_add(struct ggml_threadpool * tp, int value) { + return atomic_fetch_add_explicit(&tp->current_chunk, value, memory_order_relaxed); +} + #if defined(__gnu_linux__) static cpu_set_t ggml_get_numa_affinity(void) { cpu_set_t cpuset; @@ -670,87 +677,15 @@ bool ggml_is_numa(void) { #if defined(__linux__) && defined(__aarch64__) #include -#elif defined(__APPLE__) -#include -#endif - -#if !defined(HWCAP2_I8MM) -#define HWCAP2_I8MM (1 << 13) -#endif - -#if !defined(HWCAP2_SME) -#define HWCAP2_SME (1 << 23) #endif static void ggml_init_arm_arch_features(void) { -#if defined(__linux__) && defined(__aarch64__) - uint32_t hwcap = getauxval(AT_HWCAP); - uint32_t hwcap2 = getauxval(AT_HWCAP2); - - ggml_arm_arch_features.has_neon = !!(hwcap & HWCAP_ASIMD); - ggml_arm_arch_features.has_dotprod = !!(hwcap & HWCAP_ASIMDDP); - ggml_arm_arch_features.has_i8mm = !!(hwcap2 & HWCAP2_I8MM); - ggml_arm_arch_features.has_sve = !!(hwcap & HWCAP_SVE); - ggml_arm_arch_features.has_sme = !!(hwcap2 & HWCAP2_SME); - -#if defined(__ARM_FEATURE_SVE) +#if defined(__linux__) && defined(__aarch64__) && defined(__ARM_FEATURE_SVE) ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL); #endif -#elif defined(__APPLE__) - int oldp = 0; - size_t size = sizeof(oldp); - if (sysctlbyname("hw.optional.AdvSIMD", &oldp, &size, NULL, 0) != 0) { - oldp = 0; - } - ggml_arm_arch_features.has_neon = oldp; - - if (sysctlbyname("hw.optional.arm.FEAT_DotProd", &oldp, &size, NULL, 0) != 0) { - oldp = 0; - } - ggml_arm_arch_features.has_dotprod = oldp; - - if (sysctlbyname("hw.optional.arm.FEAT_I8MM", &oldp, &size, NULL, 0) != 0) { - oldp = 0; - } - ggml_arm_arch_features.has_i8mm = oldp; - - if (sysctlbyname("hw.optional.arm.FEAT_SME", &oldp, &size, NULL, 0) != 0) { - oldp = 0; - } - ggml_arm_arch_features.has_sme = oldp; - - ggml_arm_arch_features.has_sve = 0; - ggml_arm_arch_features.sve_cnt = 0; -#else -// Run-time CPU feature detection not implemented for this platform, fallback to compile time -#if defined(__ARM_NEON) - ggml_arm_arch_features.has_neon = 1; -#else - ggml_arm_arch_features.has_neon = 0; -#endif - -#if defined(__ARM_FEATURE_MATMUL_INT8) - ggml_arm_arch_features.has_i8mm = 1; -#else - ggml_arm_arch_features.has_i8mm = 0; -#endif - -#if defined(__ARM_FEATURE_SVE) - ggml_arm_arch_features.has_sve = 1; - ggml_arm_arch_features.sve_cnt = 16; -#else - ggml_arm_arch_features.has_sve = 0; - ggml_arm_arch_features.sve_cnt = 0; -#endif - -#if defined(__ARM_FEATURE_SME) || defined(__ARM_FEATURE_SME2) - ggml_arm_arch_features.has_sme = 1; -#else - ggml_arm_arch_features.has_sme = 0; -#endif -#endif } -#endif + +#endif // __ARM_ARCH struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) { GGML_ASSERT(!ggml_get_no_alloc(ctx)); @@ -805,7 +740,7 @@ struct ggml_tensor * ggml_set_i32 (struct ggml_tensor * tensor, int32_t value) { { assert(tensor->nb[0] == sizeof(ggml_fp16_t)); for (int i = 0; i < n; i++) { - ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value)); + ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value)); } } break; case GGML_TYPE_BF16: @@ -864,7 +799,7 @@ struct ggml_tensor * ggml_set_f32(struct ggml_tensor * tensor, float value) { { assert(tensor->nb[0] == sizeof(ggml_fp16_t)); for (int i = 0; i < n; i++) { - ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_FP32_TO_FP16(value)); + ggml_vec_set_f16(nc, (ggml_fp16_t *)(data + i*n1), GGML_CPU_FP32_TO_FP16(value)); } } break; case GGML_TYPE_BF16: @@ -915,7 +850,7 @@ int32_t ggml_get_i32_1d(const struct ggml_tensor * tensor, int i) { case GGML_TYPE_F16: { GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); - return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); + return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); } case GGML_TYPE_BF16: { @@ -960,7 +895,7 @@ void ggml_set_i32_1d(const struct ggml_tensor * tensor, int i, int32_t value) { case GGML_TYPE_F16: { GGML_ASSERT(tensor->nb[0] == sizeof(ggml_fp16_t)); - ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); + ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { @@ -989,7 +924,7 @@ int32_t ggml_get_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i case GGML_TYPE_I32: return ((int32_t *) data)[0]; case GGML_TYPE_F16: - return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]); + return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]); case GGML_TYPE_BF16: return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]); case GGML_TYPE_F32: @@ -1016,7 +951,7 @@ void ggml_set_i32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, } break; case GGML_TYPE_F16: { - ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value); + ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { @@ -1054,7 +989,7 @@ float ggml_get_f32_1d(const struct ggml_tensor * tensor, int i) { } case GGML_TYPE_F16: { - return GGML_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); + return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *)(tensor->data))[i]); } case GGML_TYPE_BF16: { @@ -1093,7 +1028,7 @@ void ggml_set_f32_1d(const struct ggml_tensor * tensor, int i, float value) { } break; case GGML_TYPE_F16: { - ((ggml_fp16_t *)(tensor->data))[i] = GGML_FP32_TO_FP16(value); + ((ggml_fp16_t *)(tensor->data))[i] = GGML_CPU_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { @@ -1120,7 +1055,7 @@ float ggml_get_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, case GGML_TYPE_I32: return ((int32_t *) data)[0]; case GGML_TYPE_F16: - return GGML_FP16_TO_FP32(((ggml_fp16_t *) data)[0]); + return GGML_CPU_FP16_TO_FP32(((ggml_fp16_t *) data)[0]); case GGML_TYPE_BF16: return GGML_BF16_TO_FP32(((ggml_bf16_t *) data)[0]); case GGML_TYPE_F32: @@ -1147,7 +1082,7 @@ void ggml_set_f32_nd(const struct ggml_tensor * tensor, int i0, int i1, int i2, } break; case GGML_TYPE_F16: { - ((ggml_fp16_t *)(data))[0] = GGML_FP32_TO_FP16(value); + ((ggml_fp16_t *)(data))[0] = GGML_CPU_FP32_TO_FP16(value); } break; case GGML_TYPE_BF16: { @@ -1258,7 +1193,7 @@ static void ggml_compute_forward_mul_mat_one_chunk( } } -static void ggml_compute_forward_mul_mat( +void ggml_compute_forward_mul_mat( const struct ggml_compute_params * params, struct ggml_tensor * dst) { @@ -1883,6 +1818,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_get_rows_back(params, tensor); } break; + case GGML_OP_SET_ROWS: + { + ggml_compute_forward_set_rows(params, tensor); + } break; case GGML_OP_DIAG: { ggml_compute_forward_diag(params, tensor); @@ -1927,6 +1866,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_im2col_back_f32(params, tensor); } break; + case GGML_OP_CONV_2D: + { + ggml_compute_forward_conv_2d(params, tensor); + } break; case GGML_OP_CONV_2D_DW: { ggml_compute_forward_conv_2d_dw(params, tensor); @@ -1959,6 +1902,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_pad_reflect_1d(params, tensor); } break; + case GGML_OP_ROLL: + { + ggml_compute_forward_roll(params, tensor); + } break; case GGML_OP_ARANGE: { ggml_compute_forward_arange(params, tensor); @@ -2006,6 +1953,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm { ggml_compute_forward_unary(params, tensor); } break; + case GGML_OP_GLU: + { + ggml_compute_forward_glu(params, tensor); + } break; case GGML_OP_GET_REL_POS: { ggml_compute_forward_get_rel_pos(params, tensor); @@ -2216,6 +2167,20 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { GGML_ABORT("fatal error"); } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(node)) { + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + { + n_tasks = n_threads; + } break; + default: + GGML_ABORT("fatal error"); + } + break; case GGML_OP_SILU_BACK: case GGML_OP_MUL: case GGML_OP_DIV: @@ -2232,6 +2197,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { n_tasks = n_threads; } break; case GGML_OP_GET_ROWS: + case GGML_OP_SET_ROWS: { // FIXME: get_rows can use additional threads, but the cost of launching additional threads // decreases performance with GPU offloading @@ -2268,6 +2234,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { } break; case GGML_OP_IM2COL: case GGML_OP_IM2COL_BACK: + case GGML_OP_CONV_2D: case GGML_OP_CONV_2D_DW: case GGML_OP_CONV_TRANSPOSE_1D: case GGML_OP_CONV_TRANSPOSE_2D: @@ -2283,6 +2250,7 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) { case GGML_OP_UPSCALE: case GGML_OP_PAD: case GGML_OP_PAD_REFLECT_1D: + case GGML_OP_ROLL: case GGML_OP_ARANGE: case GGML_OP_TIMESTEP_EMBEDDING: case GGML_OP_ARGSORT: @@ -2785,6 +2753,10 @@ struct ggml_cplan ggml_graph_plan( GGML_ABORT("fatal error"); } } break; + case GGML_OP_CONV_2D: + { + cur = GGML_IM2COL_WORK_SIZE; + } break; case GGML_OP_CONV_TRANSPOSE_2D: { const int64_t ne00 = node->src[0]->ne[0]; // W @@ -3185,6 +3157,10 @@ enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct g return ggml_graph_compute(cgraph, &cplan); } +void ggml_cpu_fp32_to_fp32(const float * x, float * y, int64_t n) { + memcpy(y, x, n * sizeof(float)); +} + void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) { int64_t i = 0; #if defined(__F16C__) @@ -3205,9 +3181,24 @@ void ggml_cpu_fp32_to_fp16(const float * x, ggml_fp16_t * y, int64_t n) { __m128i y_vec = _mm_cvtps_ph(x_vec, _MM_FROUND_TO_NEAREST_INT); _mm_storel_epi64((__m128i *)(y + i), y_vec); } +#elif defined(__NNPA__) + for (; i + 7 < n; i += 8) { + float32x4_t v_xh = vec_xl(0, (const float *)(x + i + 0)); + float32x4_t v_xl = vec_xl(0, (const float *)(x + i + 4)); + uint16x8_t v_yd = vec_round_from_fp32(v_xh, v_xl, 0); + uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0); + vec_xst(v_y, 0, (ggml_fp16_t *)(y + i)); + } + for (; i + 3 < n; i += 4) { + float32x4_t v_x = vec_xl(0, (const float *)(x + i)); + float32x4_t v_zero = vec_splats(0.0f); + uint16x8_t v_yd = vec_round_from_fp32(v_x, v_zero, 0); + uint16x8_t v_y = vec_convert_to_fp16(v_yd, 0); + vec_xst(v_y, 0, (ggml_fp16_t *)(y + i)); + } #endif for (; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16(x[i]); } } @@ -3231,9 +3222,25 @@ void ggml_cpu_fp16_to_fp32(const ggml_fp16_t * x, float * y, int64_t n) { __m128 y_vec = _mm_cvtph_ps(x_vec); _mm_storeu_ps(y + i, y_vec); } +#elif defined(__NNPA__) + for (; i + 7 < n; i += 8) { + uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i)); + uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0); + float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0); + float32x4_t v_yl = vec_extend_to_fp32_lo(v_yd, 0); + vec_xst(v_yh, 0, (float *)(y + i + 0)); + vec_xst(v_yl, 0, (float *)(y + i + 4)); + } + for (; i + 3 < n; i += 4) { + uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)(x + i)); + uint16x8_t v_yd = vec_convert_from_fp16(v_x, 0); + float32x4_t v_yh = vec_extend_to_fp32_hi(v_yd, 0); + vec_xst(v_yh, 0, (float *)(y + i)); + } #endif + for (; i < n; ++i) { - y[i] = GGML_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP16_TO_FP32(x[i]); } } @@ -3433,9 +3440,17 @@ int ggml_cpu_has_vxe(void) { #endif } +int ggml_cpu_has_nnpa(void) { +#if defined(GGML_NNPA) + return 1; +#else + return 0; +#endif +} + int ggml_cpu_has_neon(void) { #if defined(__ARM_ARCH) && defined(__ARM_NEON) - return ggml_arm_arch_features.has_neon; + return 1; #else return 0; #endif @@ -3443,7 +3458,7 @@ int ggml_cpu_has_neon(void) { int ggml_cpu_has_dotprod(void) { #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_DOTPROD) - return ggml_arm_arch_features.has_dotprod; + return 1; #else return 0; #endif @@ -3451,7 +3466,7 @@ int ggml_cpu_has_dotprod(void) { int ggml_cpu_has_sve(void) { #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SVE) - return ggml_arm_arch_features.has_sve; + return 1; #else return 0; #endif @@ -3459,7 +3474,7 @@ int ggml_cpu_has_sve(void) { int ggml_cpu_has_matmul_int8(void) { #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_MATMUL_INT8) - return ggml_arm_arch_features.has_i8mm; + return 1; #else return 0; #endif @@ -3475,14 +3490,14 @@ int ggml_cpu_get_sve_cnt(void) { int ggml_cpu_has_sme(void) { #if defined(__ARM_ARCH) && defined(__ARM_FEATURE_SME) - return ggml_arm_arch_features.has_sme; + return 1; #else return 0; #endif } void ggml_cpu_init(void) { - // needed to initialize f16 tables + // needed to initialize ggml_time { struct ggml_init_params params = { 0, NULL, false }; struct ggml_context * ctx = ggml_init(params); @@ -3503,9 +3518,10 @@ void ggml_cpu_init(void) { uint16_t u16; ggml_fp16_t fp16; } u = {i}; - float f = GGML_FP16_TO_FP32(u.fp16); - ggml_table_gelu_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_f32(f)); - ggml_table_gelu_quick_f16[i] = GGML_FP32_TO_FP16(ggml_gelu_quick_f32(f)); + float f = GGML_COMPUTE_FP16_TO_FP32(u.fp16); + ggml_table_f32_f16[i] = f; + ggml_table_gelu_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_f32(f)); + ggml_table_gelu_quick_f16[i] = GGML_CPU_FP32_TO_FP16(ggml_gelu_quick_f32(f)); } const uint64_t t_end = ggml_time_us(); UNUSED(t_end); diff --git a/ggml/src/ggml-cpu/ggml-cpu.cpp b/ggml/src/ggml-cpu/ggml-cpu.cpp index 735ef3f015c13..c9daa4c39e83e 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.cpp +++ b/ggml/src/ggml-cpu/ggml-cpu.cpp @@ -416,6 +416,7 @@ static bool ggml_backend_cpu_device_supports_op(ggml_backend_dev_t dev, const st switch (op->op) { case GGML_OP_CPY: + case GGML_OP_SET_ROWS: return op->type != GGML_TYPE_IQ3_XXS && op->type != GGML_TYPE_IQ3_S && @@ -578,6 +579,9 @@ static ggml_backend_feature * ggml_backend_cpu_get_features(ggml_backend_reg_t r if (ggml_cpu_has_vxe()) { features.push_back({ "VXE", "1" }); } + if (ggml_cpu_has_nnpa()) { + features.push_back({ "NNPA", "1" }); + } if (ggml_cpu_has_wasm_simd()) { features.push_back({ "WASM_SIMD", "1" }); } diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp index 1d46158f928c4..2be54c31b5f3e 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.cpp +++ b/ggml/src/ggml-cpu/llamafile/sgemm.cpp @@ -52,8 +52,8 @@ #include "ggml-impl.h" #include "ggml-cpu-impl.h" #include "ggml-quants.h" +#include "simd-mappings.h" -#include #include #include @@ -63,7 +63,7 @@ #define NOINLINE __attribute__((__noinline__)) #endif -#if defined(__ARM_NEON) || defined(__AVX512F__) +#if defined(__ARM_NEON) || defined(__AVX512F__) || defined(__VXE__) || defined(__VXE2__) #define VECTOR_REGISTERS 32 #else #define VECTOR_REGISTERS 16 @@ -74,7 +74,7 @@ namespace { inline float unhalf(ggml_fp16_t d) { - return GGML_FP16_TO_FP32(d); + return GGML_CPU_FP16_TO_FP32(d); } //////////////////////////////////////////////////////////////////////////////////////////////////// @@ -110,6 +110,12 @@ inline float16x8_t sub(float16x8_t x, float16x8_t y) { return vsubq_f16(x, y); } inline float16x8_t mul(float16x8_t x, float16x8_t y) { return vmulq_f16(x, y); } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__VXE__) || defined(__VXE2__) +inline float32x4_t add(float32x4_t x, float32x4_t y) { return vec_add(x, y); } +inline float32x4_t sub(float32x4_t x, float32x4_t y) { return vec_sub(x, y); } +inline float32x4_t mul(float32x4_t x, float32x4_t y) { return vec_mul(x, y); } +#endif + #if defined(__MMA__) typedef vector unsigned char vec_t; typedef __vector_quad acc_t; @@ -163,6 +169,13 @@ inline float16x8_t madd(float16x8_t a, float16x8_t b, float16x8_t c) { #endif #endif +#if defined(__VXE__) || defined(__VXE2__) +template <> +inline float32x4_t madd(float32x4_t a, float32x4_t b, float32x4_t c) { + return vec_madd(a, b, c); +} +#endif + //////////////////////////////////////////////////////////////////////////////////////////////////// // VECTORIZED HORIZONTAL SUM @@ -179,6 +192,13 @@ inline float hsum(float16x8_t x) { } #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +#if defined(__VXE__) || defined(__VXE2__) +inline float hsum(float32x4_t x) { + float32x4_t tmp = x + vec_reve(x); + return tmp[0] + tmp[1]; +} +#endif + #if defined(__SSE__) || defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) inline float hsum(__m128 x) { #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) @@ -228,6 +248,21 @@ template <> inline float32x4_t load(const ggml_fp16_t *p) { #endif // _MSC_VER #endif // __ARM_NEON +#if defined(__VXE__) || defined(__VXE2__) +template <> inline float32x4_t load(const ggml_fp16_t * p) { + float tmp[4]; + + for (int i = 0; i < 4; i++) { + tmp[i] = GGML_CPU_FP16_TO_FP32(p[i]); + } + + return vec_xl(0, (const float *)(tmp)); +} +template <> inline float32x4_t load(const float * p) { + return vec_xl(0, p); +} +#endif + #if defined(__SSE__) || defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__) template <> inline __m128 load(const float *p) { return _mm_loadu_ps(p); @@ -394,8 +429,6 @@ class tinyBLAS { template NOINLINE void gemm(int64_t m, int64_t n, int64_t BN) { - static std::atomic current_chunk; - GGML_ASSERT(m % (RM * BM) == 0); const int64_t ytiles = m / (RM * BM); const int64_t xtiles = (n + RN -1) / RN; @@ -410,7 +443,7 @@ class tinyBLAS { if (params->ith == 0) { GGML_ASSERT( jj_BN * SIZE_BN + (NB_BN - jj_BN) * (SIZE_BN - 1) == xtiles); // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start. - std::atomic_store_explicit(¤t_chunk, (int64_t)params->nth, std::memory_order_relaxed); + ggml_threadpool_chunk_set(params->threadpool, params->nth); } ggml_barrier(params->threadpool); @@ -439,8 +472,7 @@ class tinyBLAS { GGML_ASSERT(jj == jj2); } - // next step. - job = std::atomic_fetch_add_explicit(¤t_chunk, (int64_t)1, std::memory_order_relaxed); + job = ggml_threadpool_chunk_add(params->threadpool, 1); } ggml_barrier(params->threadpool); @@ -1509,7 +1541,7 @@ class tinyBLAS_BF16_PPC { } else if constexpr(RM == 8 && RN == 4) { KERNEL_8x4(ii,jj); } else { - static_assert(false, "RN/RM values not supported"); + assert(false && "RN/RM values not supported"); } } @@ -1541,13 +1573,13 @@ class tinyBLAS_BF16_PPC { const int nth; }; -template +template class tinyBLAS_Q0_PPC { public: tinyBLAS_Q0_PPC(int64_t k, const TA *A, int64_t lda, - const TB *B, int64_t ldb, - TC *C, int64_t ldc, + const block_q8_0 *B, int64_t ldb, + float *C, int64_t ldc, int ith, int nth) : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) { } @@ -1558,8 +1590,7 @@ class tinyBLAS_Q0_PPC { private: - template - inline void save_res(int ii, int jj, int idx, vector float* fin_res) { + inline void save_res(int ii, int jj, int idx, vector float* fin_res, int RM=4, int RN=4) { for (int I = 0; I < RM; I++) { for (int J = 0; J < RN; J++) { *((float*)(C+ii+((jj+J)*ldc)+I)) = *((float*)&fin_res[idx+I]+J); @@ -1579,29 +1610,67 @@ class tinyBLAS_Q0_PPC { fin_res[s_idx+i] = vec_madd(res[i], vs[s_idx+i], fin_res[s_idx+i]); } } - - template - void packNormalInt4(const TA* a, int64_t lda, int rows, int cols, VA* vec, std::array& comparray) { - int64_t i, j; - TA *aoffset = NULL; - VA *vecOffset = NULL; - TA *aoffset1 = NULL, *aoffset2 = NULL, *aoffset3 = NULL, *aoffset4 = NULL; - TA *aoffset5 = NULL, *aoffset6 = NULL, *aoffset7 = NULL, *aoffset8 = NULL; - VB c1[2] = {0}, c2[2] = {0}, c3[2] = {0}, c4[2] = {0}; - VB c5[2] = {0}, c6[2] = {0}, c7[2] = {0}, c8[2] = {0}; - VB t1, t2, t3, t4, t5, t6, t7, t8; + /* This function processes quantized data from block_q4_0 elements. + * First the we try to extract the two int4 values stored in single int8_t into two signed int8. + * And then we subtract each of the resultant element with 8, to convert signed int8 to unsigned int8. + * Also compute the rowsum which is required to compensate the above conversion. */ + inline void process_q4_elements(vector signed char (&c)[2], int* ca) { const vector signed char lowMask = vec_splats((signed char)0xF); const vector unsigned char v4 = vec_splats((unsigned char)0x4); const vector signed char v8 = vec_splats((signed char)0x8); - aoffset = const_cast(a); - vecOffset = vec; + vector signed int vsum = {0}; + vector signed int vsum2 = {0}; + c[0] = vec_and(c[1], lowMask); + c[1] = vec_sr(c[1], v4); + c[0] = vec_sub(c[0], v8); + c[1] = vec_sub(c[1], v8); + vsum = vec_sum4s(c[0], vsum); + vsum2 = vec_sum4s(c[1], vsum2); + vsum = vec_add(vsum, vsum2); + *(ca) = vsum[0] + vsum[1] + vsum[2] + vsum[3]; + } + + template + inline void vector_permute_store(V2 &s1, V2 &s2, V2 &s3, V2 &s4, V1 *vecOffset, bool flip) { vector unsigned char swiz1 = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; vector unsigned char swiz2 = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; vector unsigned char swiz3 = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; vector unsigned char swiz4 = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; - vector signed int vsum = {0}; - vector signed int vsum2 = {0}; + V2 t1, t2, t3, t4, t5, t6, t7, t8; + vector unsigned char xor_vector; + uint8_t flip_vec = 0x80; + xor_vector = vec_splats(flip_vec); + t1 = vec_perm(s1, s2, swiz1); + t2 = vec_perm(s1, s2, swiz2); + t3 = vec_perm(s3, s4, swiz1); + t4 = vec_perm(s3, s4, swiz2); + t5 = vec_perm(t1, t3, swiz3); + t6 = vec_perm(t1, t3, swiz4); + t7 = vec_perm(t2, t4, swiz3); + t8 = vec_perm(t2, t4, swiz4); + if (flip == true) { + t5 = vec_xor(t5, xor_vector); + t6 = vec_xor(t6, xor_vector); + t7 = vec_xor(t7, xor_vector); + t8 = vec_xor(t8, xor_vector); + } + vec_xst(t5, 0, vecOffset); + vec_xst(t6, 0, vecOffset+16); + vec_xst(t7, 0, vecOffset+32); + vec_xst(t8, 0, vecOffset+48); + } + template + void packNormalInt4(const TA* a, int64_t lda, int rows, int cols, int8_t* vec, std::array& comparray) { + int64_t i, j; + TA *aoffset = NULL; + int8_t *vecOffset = NULL; + TA *aoffset1 = NULL, *aoffset2 = NULL, *aoffset3 = NULL, *aoffset4 = NULL; + TA *aoffset5 = NULL, *aoffset6 = NULL, *aoffset7 = NULL, *aoffset8 = NULL; + vector signed char c1[2] = {0}, c2[2] = {0}, c3[2] = {0}, c4[2] = {0}; + vector signed char c5[2] = {0}, c6[2] = {0}, c7[2] = {0}, c8[2] = {0}; + aoffset = const_cast(a); + vecOffset = vec; j = (rows >> 3); if (j > 0) { do { @@ -1614,159 +1683,30 @@ class tinyBLAS_Q0_PPC { aoffset7 = aoffset6 + lda; aoffset8 = aoffset7 + lda; aoffset += 8 * lda; - i = (cols >> 2); if (i > 0) { do { - c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); - c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); - c5[1] = reinterpret_cast(vec_xl(0, aoffset5->qs)); - c6[1] = reinterpret_cast(vec_xl(0, aoffset6->qs)); - c7[1] = reinterpret_cast(vec_xl(0, aoffset7->qs)); - c8[1] = reinterpret_cast(vec_xl(0, aoffset8->qs)); - - c1[0] = vec_and(c1[1], lowMask); - c1[1] = vec_sr(c1[1], v4); - c1[0] = vec_sub(c1[0], v8); - c1[1] = vec_sub(c1[1], v8); - vsum = vec_sum4s(c1[0], vsum); - vsum2 = vec_sum4s(c1[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[0] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c2[0] = vec_and(c2[1], lowMask); - c2[1] = vec_sr(c2[1], v4); - c2[0] = vec_sub(c2[0], v8); - c2[1] = vec_sub(c2[1], v8); - vsum = vec_sum4s(c2[0], vsum); - vsum2 = vec_sum4s(c2[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[1] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c3[0] = vec_and(c3[1], lowMask); - c3[1] = vec_sr(c3[1], v4); - c3[0] = vec_sub(c3[0], v8); - c3[1] = vec_sub(c3[1], v8); - vsum = vec_sum4s(c3[0], vsum); - vsum2 = vec_sum4s(c3[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[2] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c4[0] = vec_and(c4[1], lowMask); - c4[1] = vec_sr(c4[1], v4); - c4[0] = vec_sub(c4[0], v8); - c4[1] = vec_sub(c4[1], v8); - vsum = vec_sum4s(c4[0], vsum); - vsum2 = vec_sum4s(c4[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[3] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c5[0] = vec_and(c5[1], lowMask); - c5[1] = vec_sr(c5[1], v4); - c5[0] = vec_sub(c5[0], v8); - c5[1] = vec_sub(c5[1], v8); - vsum = vec_sum4s(c5[0], vsum); - vsum2 = vec_sum4s(c5[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[4] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c6[0] = vec_and(c6[1], lowMask); - c6[1] = vec_sr(c6[1], v4); - c6[0] = vec_sub(c6[0], v8); - c6[1] = vec_sub(c6[1], v8); - vsum = vec_sum4s(c6[0], vsum); - vsum2 = vec_sum4s(c6[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[5] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c7[0] = vec_and(c7[1], lowMask); - c7[1] = vec_sr(c7[1], v4); - c7[0] = vec_sub(c7[0], v8); - c7[1] = vec_sub(c7[1], v8); - vsum = vec_sum4s(c7[0], vsum); - vsum2 = vec_sum4s(c7[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[6] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c8[0] = vec_and(c8[1], lowMask); - c8[1] = vec_sr(c8[1], v4); - c8[0] = vec_sub(c8[0], v8); - c8[1] = vec_sub(c8[1], v8); - vsum = vec_sum4s(c8[0], vsum); - vsum2 = vec_sum4s(c8[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[7] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); - - t1 = vec_perm(c5[0], c6[0], swiz1); - t2 = vec_perm(c5[0], c6[0], swiz2); - t3 = vec_perm(c7[0], c8[0], swiz1); - t4 = vec_perm(c7[0], c8[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset+128); - vec_xst(t6, 0, vecOffset+144); - vec_xst(t7, 0, vecOffset+160); - vec_xst(t8, 0, vecOffset+176); - - t1 = vec_perm(c5[1], c6[1], swiz1); - t2 = vec_perm(c5[1], c6[1], swiz2); - t3 = vec_perm(c7[1], c8[1], swiz1); - t4 = vec_perm(c7[1], c8[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset+192); - vec_xst(t6, 0, vecOffset+208); - vec_xst(t7, 0, vecOffset+224); - vec_xst(t8, 0, vecOffset+240); - + c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); + c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); + c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); + c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); + c5[1] = reinterpret_cast(vec_xl(0, aoffset5->qs)); + c6[1] = reinterpret_cast(vec_xl(0, aoffset6->qs)); + c7[1] = reinterpret_cast(vec_xl(0, aoffset7->qs)); + c8[1] = reinterpret_cast(vec_xl(0, aoffset8->qs)); + + process_q4_elements(c1, &comparray[0]); + process_q4_elements(c2, &comparray[1]); + process_q4_elements(c3, &comparray[2]); + process_q4_elements(c4, &comparray[3]); + process_q4_elements(c5, &comparray[4]); + process_q4_elements(c6, &comparray[5]); + process_q4_elements(c7, &comparray[6]); + process_q4_elements(c8, &comparray[7]); + vector_permute_store(c1[0], c2[0], c3[0], c4[0], vecOffset, false); + vector_permute_store(c1[1], c2[1], c3[1], c4[1], vecOffset+64, false); + vector_permute_store(c5[0], c6[0], c7[0], c8[0], vecOffset+128, false); + vector_permute_store(c5[1], c6[1], c7[1], c8[1], vecOffset+192, false); aoffset1 += lda; aoffset2 += lda; aoffset3 += lda; @@ -1789,85 +1729,20 @@ class tinyBLAS_Q0_PPC { aoffset3 = aoffset2 + lda; aoffset4 = aoffset3 + lda; aoffset += 4 * lda; - i = (cols >> 2); if (i > 0) { do { - c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); - c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); - - c1[0] = vec_and(c1[1], lowMask); - c1[1] = vec_sr(c1[1], v4); - c1[0] = vec_sub(c1[0], v8); - c1[1] = vec_sub(c1[1], v8); - vsum = vec_sum4s(c1[0], vsum); - vsum2 = vec_sum4s(c1[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[0] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c2[0] = vec_and(c2[1], lowMask); - c2[1] = vec_sr(c2[1], v4); - c2[0] = vec_sub(c2[0], v8); - c2[1] = vec_sub(c2[1], v8); - vsum = vec_sum4s(c2[0], vsum); - vsum2 = vec_sum4s(c2[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[1] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c3[0] = vec_and(c3[1], lowMask); - c3[1] = vec_sr(c3[1], v4); - c3[0] = vec_sub(c3[0], v8); - c3[1] = vec_sub(c3[1], v8); - vsum = vec_sum4s(c3[0], vsum); - vsum2 = vec_sum4s(c3[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[2] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c4[0] = vec_and(c4[1], lowMask); - c4[1] = vec_sr(c4[1], v4); - c4[0] = vec_sub(c4[0], v8); - c4[1] = vec_sub(c4[1], v8); - vsum = vec_sum4s(c4[0], vsum); - vsum2 = vec_sum4s(c4[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[3] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats( 0); - - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); - + c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); + c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); + c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); + c4[1] = reinterpret_cast(vec_xl(0, aoffset4->qs)); + + process_q4_elements(c1, &comparray[0]); + process_q4_elements(c2, &comparray[1]); + process_q4_elements(c3, &comparray[2]); + process_q4_elements(c4, &comparray[3]); + vector_permute_store(c1[0], c2[0], c3[0], c4[0], vecOffset, false); + vector_permute_store(c1[1], c2[1], c3[1], c4[1], vecOffset+64, false); aoffset1 += lda; aoffset2 += lda; aoffset3 += lda; @@ -1886,80 +1761,17 @@ class tinyBLAS_Q0_PPC { if (i > 0) { do { switch(rows) { - case 3: c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); - case 2: c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); - case 1: c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); + case 3: c3[1] = reinterpret_cast(vec_xl(0, aoffset3->qs)); + case 2: c2[1] = reinterpret_cast(vec_xl(0, aoffset2->qs)); + case 1: c1[1] = reinterpret_cast(vec_xl(0, aoffset1->qs)); break; } - c1[0] = vec_and(c1[1], lowMask); - c1[1] = vec_sr(c1[1], v4); - c1[0] = vec_sub(c1[0], v8); - c1[1] = vec_sub(c1[1], v8); - vsum = vec_sum4s(c1[0], vsum); - vsum2 = vec_sum4s(c1[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[0] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c2[0] = vec_and(c2[1], lowMask); - c2[1] = vec_sr(c2[1], v4); - c2[0] = vec_sub(c2[0], v8); - c2[1] = vec_sub(c2[1], v8); - vsum = vec_sum4s(c2[0], vsum); - vsum2 = vec_sum4s(c2[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[1] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c3[0] = vec_and(c3[1], lowMask); - c3[1] = vec_sr(c3[1], v4); - c3[0] = vec_sub(c3[0], v8); - c3[1] = vec_sub(c3[1], v8); - vsum = vec_sum4s(c3[0], vsum); - vsum2 = vec_sum4s(c3[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[2] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - c4[0] = vec_and(c4[1], lowMask); - c4[1] = vec_sr(c4[1], v4); - c4[0] = vec_sub(c4[0], v8); - c4[1] = vec_sub(c4[1], v8); - vsum = vec_sum4s(c4[0], vsum); - vsum2 = vec_sum4s(c4[1], vsum2); - vsum = vec_add(vsum, vsum2); - comparray[3] = vsum[0] + vsum[1] + vsum[2] + vsum[3]; - vsum = vec_splats(0); - vsum2 = vec_splats(0); - - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); + process_q4_elements(c1, &comparray[0]); + process_q4_elements(c2, &comparray[1]); + process_q4_elements(c3, &comparray[2]); + process_q4_elements(c4, &comparray[3]); + vector_permute_store(c1[0], c2[0], c3[0], c4[0], vecOffset, false); + vector_permute_store(c1[1], c2[1], c3[1], c4[1], vecOffset+64, false); aoffset1 += lda; aoffset2 += lda; aoffset3 += lda; @@ -1969,146 +1781,40 @@ class tinyBLAS_Q0_PPC { } } } - template - void packNormal(const TB* a, int64_t lda, int rows, int cols, VA* vec, bool flip) { + void packNormal(const block_q8_0* a, int64_t lda, int rows, int cols, VA* vec, bool flip) { int64_t i, j; - TB *aoffset = NULL; + block_q8_0 *aoffset = NULL; VA *vecOffset = NULL; - TB *aoffset1 = NULL, *aoffset2 = NULL, *aoffset3 = NULL, *aoffset4 = NULL; - TB *aoffset5 = NULL, *aoffset6 = NULL, *aoffset7 = NULL, *aoffset8 = NULL; - __vector_pair C1, C2, C3, C4, C5, C6, C7, C8; - VB c1[2] = {0}, c2[2] = {0}, c3[2] = {0}, c4[2]={0}; - VB c5[2] = {0}, c6[2] = {0}, c7[2] = {0}, c8[2]={0}; - VB t1, t2, t3, t4, t5, t6, t7, t8; - vector unsigned char xor_vector; - uint8_t flip_vec = 0x80; - xor_vector = vec_splats(flip_vec); - vector unsigned char swiz1 = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23}; - vector unsigned char swiz2 = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31}; - vector unsigned char swiz3 = {0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27}; - vector unsigned char swiz4 = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31}; - - aoffset = const_cast(a); + block_q8_0* aoffsets[8]; + __vector_pair arr[8]; + VB c[8][2] = {0}; + VB c1[8] = {0}; VB c2[8] = {0}; + aoffset = const_cast(a); vecOffset = vec; j = (rows >> 3); if (j > 0) { do { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; - aoffset4 = aoffset3 + lda; - aoffset5 = aoffset4 + lda; - aoffset6 = aoffset5 + lda; - aoffset7 = aoffset6 + lda; - aoffset8 = aoffset7 + lda; + aoffsets[0] = aoffset; + for (int it = 1; it < 8; it++) + aoffsets[it] = aoffsets[it-1] + lda; aoffset += 8 * lda; i = (cols >> 3); if (i > 0) { do { - C1 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset1->qs); - C2 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset2->qs); - C3 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset3->qs); - C4 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset4->qs); - C5 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset5->qs); - C6 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset6->qs); - C7 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset7->qs); - C8 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset8->qs); - - __builtin_vsx_disassemble_pair(c1, &C1); - __builtin_vsx_disassemble_pair(c2, &C2); - __builtin_vsx_disassemble_pair(c3, &C3); - __builtin_vsx_disassemble_pair(c4, &C4); - __builtin_vsx_disassemble_pair(c5, &C5); - __builtin_vsx_disassemble_pair(c6, &C6); - __builtin_vsx_disassemble_pair(c7, &C7); - __builtin_vsx_disassemble_pair(c8, &C8); - - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); - } - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); - } - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); - - t1 = vec_perm(c5[0], c6[0], swiz1); - t2 = vec_perm(c5[0], c6[0], swiz2); - t3 = vec_perm(c7[0], c8[0], swiz1); - t4 = vec_perm(c7[0], c8[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); + for (int it = 0; it < 8; it++) { + arr[it] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[it]->qs); + __builtin_vsx_disassemble_pair(c[it], &arr[it]); + c1[it] = c[it][0]; + c2[it] = c[it][1]; } - vec_xst(t5, 0, vecOffset+128); - vec_xst(t6, 0, vecOffset+144); - vec_xst(t7, 0, vecOffset+160); - vec_xst(t8, 0, vecOffset+176); - - t1 = vec_perm(c5[1], c6[1], swiz1); - t2 = vec_perm(c5[1], c6[1], swiz2); - t3 = vec_perm(c7[1], c8[1], swiz1); - t4 = vec_perm(c7[1], c8[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); - } - vec_xst(t5, 0, vecOffset+192); - vec_xst(t6, 0, vecOffset+208); - vec_xst(t7, 0, vecOffset+224); - vec_xst(t8, 0, vecOffset+240); - - aoffset1 += lda; - aoffset2 += lda; - aoffset3 += lda; - aoffset4 += lda; - aoffset5 += lda; - aoffset6 += lda; - aoffset7 += lda; - aoffset8 += lda; + vector_permute_store(c1[0], c1[1], c1[2], c1[3], vecOffset, flip); + vector_permute_store(c2[0], c2[1], c2[2], c2[3], vecOffset+64, flip); + vector_permute_store(c1[4], c1[5], c1[6], c1[7], vecOffset+128, flip); + vector_permute_store(c2[4], c2[5], c2[6], c2[7], vecOffset+192, flip); + for (int it = 0; it < 8; it++) + aoffsets[it] += lda; vecOffset += 256; i--; } while(i > 0); @@ -2118,129 +1824,53 @@ class tinyBLAS_Q0_PPC { } if (rows & 4) { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; - aoffset4 = aoffset3 + lda; - aoffset += 4 * lda; - + aoffsets[0] = aoffset; + for (int it = 1; it < 4; it++ ) + aoffsets[it] = aoffsets[it-1] + lda; + aoffset += 4 * lda; i = (cols >> 3); if (i > 0) { do { - C1 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset1->qs); - C2 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset2->qs); - C3 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset3->qs); - C4 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset4->qs); - - __builtin_vsx_disassemble_pair(c1, &C1); - __builtin_vsx_disassemble_pair(c2, &C2); - __builtin_vsx_disassemble_pair(c3, &C3); - __builtin_vsx_disassemble_pair(c4, &C4); - - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); + for (int it = 0; it < 4; it++) { + arr[it] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[it]->qs); + __builtin_vsx_disassemble_pair(c[it], &arr[it]); + c1[it] = c[it][0]; + c2[it] = c[it][1]; } - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); + vector_permute_store(c1[0], c1[1], c1[2], c1[3], vecOffset, flip); + vector_permute_store(c2[0], c2[1], c2[2], c2[3], vecOffset+64, flip); + for (int it = 0; it < 4; it++) { + aoffsets[it] += lda; } - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); - - aoffset1 += lda; - aoffset2 += lda; - aoffset3 += lda; - aoffset4 += lda; vecOffset += 128; i--; } while(i > 0); } } + if (rows & 3) { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; + aoffsets[0] = aoffset; + for (int it = 1; it < 3; it++ ) + aoffsets[it] = aoffsets[it-1] + lda; i = (cols >> 3); if (i > 0) { do { switch(rows) { - case 3: C3 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset3->qs); - __builtin_vsx_disassemble_pair(c3, &C3); - case 2: C2 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset2->qs); - __builtin_vsx_disassemble_pair(c2, &C2); - case 1: C1 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset1->qs); - __builtin_vsx_disassemble_pair(c1, &C1); + case 3: arr[2] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[2]->qs); + __builtin_vsx_disassemble_pair(c[2], &arr[2]); + c1[2] = c[2][0]; c2[2] = c[2][1]; + case 2: arr[1] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[1]->qs); + __builtin_vsx_disassemble_pair(c[1], &arr[1]); + c1[1] = c[1][0]; c2[1] = c[1][1]; + case 1: arr[0] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[0]->qs); + __builtin_vsx_disassemble_pair(c[0], &arr[0]); + c1[0] = c[0][0]; c2[0] = c[0][1]; break; } - t1 = vec_perm(c1[0], c2[0], swiz1); - t2 = vec_perm(c1[0], c2[0], swiz2); - t3 = vec_perm(c3[0], c4[0], swiz1); - t4 = vec_perm(c3[0], c4[0], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); - } - vec_xst(t5, 0, vecOffset); - vec_xst(t6, 0, vecOffset+16); - vec_xst(t7, 0, vecOffset+32); - vec_xst(t8, 0, vecOffset+48); - - t1 = vec_perm(c1[1], c2[1], swiz1); - t2 = vec_perm(c1[1], c2[1], swiz2); - t3 = vec_perm(c3[1], c4[1], swiz1); - t4 = vec_perm(c3[1], c4[1], swiz2); - t5 = vec_perm(t1, t3, swiz3); - t6 = vec_perm(t1, t3, swiz4); - t7 = vec_perm(t2, t4, swiz3); - t8 = vec_perm(t2, t4, swiz4); - if (flip == true) { - t5 = vec_xor(t5, xor_vector); - t6 = vec_xor(t6, xor_vector); - t7 = vec_xor(t7, xor_vector); - t8 = vec_xor(t8, xor_vector); - } - vec_xst(t5, 0, vecOffset+64); - vec_xst(t6, 0, vecOffset+80); - vec_xst(t7, 0, vecOffset+96); - vec_xst(t8, 0, vecOffset+112); - - aoffset1 += lda; - aoffset2 += lda; - aoffset3 += lda; + vector_permute_store(c1[0], c1[1], c1[2], c1[3], vecOffset, flip); + vector_permute_store(c2[0], c2[1], c2[2], c2[3], vecOffset+64, flip); + for (int it = 0; it < 3; it++) + aoffsets[it] += lda; vecOffset += 128; i--; } while(i > 0); @@ -2249,159 +1879,42 @@ class tinyBLAS_Q0_PPC { } void mnpack(int64_t m0, int64_t m, int64_t n0, int64_t n) { - int64_t mc, nc, mp, np; - int m_rem = MIN(m - m0, 8); - int n_rem = MIN(n - n0, 8); - // TO-DO: KERNEL_16x8 and KERNEL_8x16 are having some performance - // issues. After resolving them, below code will be enabled. - /*if (m_rem >= 16 && n_rem >= 8) { - mc = 16; - nc = 8; - gemm<16,8>(m0, m, n0, n); - } else if(m_rem >= 8 && n_rem >= 16) { - mc = 8; - nc = 16; - gemm<8,16>(m0, m, n0, n); - }*/ + int m_rem = MIN(m - m0, 16); + int n_rem = MIN(n - n0, 16); + + int mc = 0, nc = 0; + if (m_rem >= 8 && n_rem >= 8) { - mc = 8; - nc = 8; - gemm<8,8>(m0, m, n0, n); + mc = 8; + nc = 8; + gemm<8, 8>(m0, m, n0, n); } else if (m_rem >= 4 && n_rem >= 8) { mc = 4; nc = 8; - gemm<4,8>(m0, m, n0, n); + gemm<4, 8>(m0, m, n0, n); } else if (m_rem >= 8 && n_rem >= 4) { mc = 8; nc = 4; - gemm<8,4>(m0, m, n0, n); + gemm<8, 4>(m0, m, n0, n); } else if (m_rem >= 4 && n_rem >= 4) { mc = 4; nc = 4; - gemm_small<4, 4>(m0, m, n0, n); - } else if ((m_rem < 4) && (n_rem > 4)) { - nc = 4; - switch(m_rem) { - case 1: - mc = 1; - gemm_small<1, 4>(m0, m, n0, n); - break; - case 2: - mc = 2; - gemm_small<2, 4>(m0, m, n0, n); - break; - case 3: - mc = 3; - gemm_small<3, 4>(m0, m, n0, n); - break; - default: - return; - } - } else if ((m_rem > 4) && (n_rem < 4)) { - mc = 4; - switch(n_rem) { - case 1: - nc = 1; - gemm_small<4, 1>(m0, m, n0, n); - break; - case 2: - nc = 2; - gemm_small<4, 2>(m0, m, n0, n); - break; - case 3: - nc = 3; - gemm_small<4, 3>(m0, m, n0, n); - break; - default: - return; - } + gemm_small(m0, m, n0, n, mc, nc); } else { - switch((m_rem << 4) | n_rem) { - case 0x43: - mc = 4; - nc = 3; - gemm_small<4, 3>(m0, m, n0, n); - break; - case 0x42: - mc = 4; - nc = 2; - gemm_small<4, 2>(m0, m, n0, n); - break; - case 0x41: - mc = 4; - nc = 1; - gemm_small<4, 1>(m0, m, n0, n); - break; - case 0x34: - mc = 3; - nc = 4; - gemm_small<3, 4>(m0, m, n0, n); - break; - case 0x33: - mc = 3; - nc = 3; - gemm_small<3, 3>(m0, m, n0, n); - break; - case 0x32: - mc = 3; - nc = 2; - gemm_small<3, 2>(m0, m, n0, n); - break; - case 0x31: - mc = 3; - nc = 1; - gemm_small<3, 1>(m0, m, n0, n); - break; - case 0x24: - mc = 2; - nc = 4; - gemm_small<2, 4>(m0, m, n0, n); - break; - case 0x23: - mc = 2; - nc = 3; - gemm_small<2, 3>(m0, m, n0, n); - break; - case 0x22: - mc = 2; - nc = 2; - gemm_small<2, 2>(m0, m, n0, n); - break; - case 0x21: - mc = 2; - nc = 1; - gemm_small<2, 1>(m0, m, n0, n); - break; - case 0x14: - mc = 1; - nc = 4; - gemm_small<1, 4>(m0, m, n0, n); - break; - case 0x13: - mc = 1; - nc = 3; - gemm_small<1, 3>(m0, m, n0, n); - break; - case 0x12: - mc = 1; - nc = 2; - gemm_small<1, 2>(m0, m, n0, n); - break; - case 0x11: - mc = 1; - nc = 1; - gemm_small<1, 1>(m0, m, n0, n); - break; - default: - return; - } + mc = (m_rem >= 4) ? 4 : m_rem; + nc = (n_rem >= 4) ? 4 : n_rem; + if (mc == 0 || nc == 0) + return; + gemm_small(m0, m, n0, n, mc, nc); } - mp = m0 + (m - m0) / mc * mc; - np = n0 + (n - n0) / nc * nc; + + int64_t mp = m0 + ((m - m0) / mc) * mc; + int64_t np = n0 + ((n - n0) / nc) * nc; mnpack(mp, m, n0, np); mnpack(m0, m, np, n); } + void KERNEL_4x8(int64_t ii, int64_t jj) { vec_t vec_A[8], vec_B[16] = {0}; acc_t acc_0, acc_1; @@ -2413,9 +1926,9 @@ class tinyBLAS_Q0_PPC { __builtin_mma_xxsetaccz(&acc_0); __builtin_mma_xxsetaccz(&acc_1); if (std::is_same_v) { - packNormalInt4((A+(ii*lda)+l), lda, 4, 4, (int8_t*)vec_A, comparray); + packNormalInt4<4>((A+(ii*lda)+l), lda, 4, 4, (int8_t*)vec_A, comparray); } else { - packNormal((const TB*)(A+(ii*lda)+l), lda, 4, 8, (int8_t*)vec_A, false); + packNormal((const block_q8_0*)(A+(ii*lda)+l), lda, 4, 8, (int8_t*)vec_A, false); } packNormal((B+(jj*ldb)+l), ldb, 8, 8, (uint8_t*)vec_B, true); for(int x = 0; x < 8; x++) { @@ -2443,8 +1956,8 @@ class tinyBLAS_Q0_PPC { compute<4>(&acc_0, 0, 0, comparray, vs, fin_res); compute<4>(&acc_1, 0, 4, comparray, vs, fin_res); } - save_res<4, 4>(ii, jj, 0, fin_res); - save_res<4, 4>(ii, jj+4, 4, fin_res); + save_res(ii, jj, 0, fin_res); + save_res(ii, jj+4, 4, fin_res); } void KERNEL_8x4(int64_t ii, int64_t jj) { @@ -2458,9 +1971,9 @@ class tinyBLAS_Q0_PPC { __builtin_mma_xxsetaccz(&acc_0); __builtin_mma_xxsetaccz(&acc_1); if (std::is_same_v) { - packNormalInt4((A+(ii*lda)+l), lda, 8, 4, (int8_t*)vec_A, comparray); + packNormalInt4<8>((A+(ii*lda)+l), lda, 8, 4, (int8_t*)vec_A, comparray); } else { - packNormal((const TB*)(A+(ii*lda)+l), lda, 8, 8, (int8_t*)vec_A, false); + packNormal((const block_q8_0*)(A+(ii*lda)+l), lda, 8, 8, (int8_t*)vec_A, false); } packNormal((B+(jj*ldb)+l), ldb, 4, 8, (uint8_t*)vec_B, true); for(int x = 0; x < 8; x++) { @@ -2487,8 +2000,8 @@ class tinyBLAS_Q0_PPC { compute<8>(&acc_0, 0, 0, comparray, vs, fin_res); compute<8>(&acc_1, 4, 4, comparray, vs, fin_res); } - save_res<4, 4>(ii, jj, 0, fin_res); - save_res<4, 4>(ii+4, jj, 4, fin_res); + save_res(ii, jj, 0, fin_res); + save_res(ii+4, jj, 4, fin_res); } void KERNEL_8x8(int64_t ii, int64_t jj) { @@ -2504,9 +2017,9 @@ class tinyBLAS_Q0_PPC { __builtin_mma_xxsetaccz(&acc_2); __builtin_mma_xxsetaccz(&acc_3); if (std::is_same_v) { - packNormalInt4((A+(ii*lda)+l), lda, 8, 4, (int8_t*)vec_A, comparray); + packNormalInt4<8>((A+(ii*lda)+l), lda, 8, 4, (int8_t*)vec_A, comparray); } else { - packNormal((const TB*)(A+(ii*lda)+l), lda, 8, 8, (int8_t*)vec_A, false); + packNormal((const block_q8_0*)(A+(ii*lda)+l), lda, 8, 8, (int8_t*)vec_A, false); } packNormal((B+(jj*ldb)+l), ldb, 8, 8, (uint8_t*)vec_B, true); for(int x = 0; x < 8; x++) { @@ -2538,14 +2051,13 @@ class tinyBLAS_Q0_PPC { compute<8>(&acc_2, 0, 8, comparray, vs, fin_res); compute<8>(&acc_3, 4, 12, comparray, vs, fin_res); } - save_res<4, 4>(ii, jj, 0, fin_res); - save_res<4, 4>(ii+4, jj, 4, fin_res); - save_res<4, 4>(ii, jj+4, 8, fin_res); - save_res<4, 4>(ii+4, jj+4, 12, fin_res); + save_res(ii, jj, 0, fin_res); + save_res(ii+4, jj, 4, fin_res); + save_res(ii, jj+4, 8, fin_res); + save_res(ii+4, jj+4, 12, fin_res); } - template - void gemm_small(int64_t m0, int64_t m, int64_t n0, int64_t n) { + void gemm_small(int64_t m0, int64_t m, int64_t n0, int64_t n, int RM, int RN) { int64_t ytiles = (m - m0) / RM; int64_t xtiles = (n - n0) / RN; int64_t tiles = xtiles * ytiles; @@ -2574,9 +2086,9 @@ class tinyBLAS_Q0_PPC { __builtin_prefetch((B+(jj*ldb)+(l+1))->qs, 0, 1); // prefetch one loop ahead __builtin_mma_xxsetaccz(&acc_0); if (isAblock_q4) { - packNormalInt4((A+(ii*lda)+l), lda, RM, 4, (int8_t*)vec_A, comparray); + packNormalInt4<4>((A+(ii*lda)+l), lda, RM, 4, (int8_t*)vec_A, comparray); } else { - packNormal((const TB*)(A+(ii*lda)+l), lda, RM, 8, (int8_t*)vec_A, false); + packNormal((const block_q8_0*)(A+(ii*lda)+l), lda, RM, 8, (int8_t*)vec_A, false); } packNormal((B+(jj*ldb)+l), ldb, RN, 8, (uint8_t*)vec_B, true); for(int x = 0; x < 8; x+=4) { @@ -2609,7 +2121,7 @@ class tinyBLAS_Q0_PPC { fin_res[i] = vec_madd(res[i], vs[i], fin_res[i]); } } - save_res(ii, jj, 0, fin_res); + save_res(ii, jj, 0, fin_res, RM, RN); } } @@ -2622,7 +2134,7 @@ class tinyBLAS_Q0_PPC { } else if constexpr(RM == 8 && RN == 8) { KERNEL_8x8(ii,jj); } else { - static_assert(false, "RN/RM values not supported"); + assert(false && "RN/RM values not supported"); } } @@ -2644,10 +2156,8 @@ class tinyBLAS_Q0_PPC { } const TA *const A; - const TB *const B; - TC *C; - TA *At; - TB *Bt; + const block_q8_0 *const B; + float *C; const int64_t k; const int64_t lda; const int64_t ldb; @@ -2656,13 +2166,12 @@ class tinyBLAS_Q0_PPC { const int nth; }; -template class tinyBLAS_PPC { public: tinyBLAS_PPC(int64_t k, - const TA *A, int64_t lda, - const TB *B, int64_t ldb, - TC *C, int64_t ldc, + const float *A, int64_t lda, + const float *B, int64_t ldb, + float *C, int64_t ldc, int ith, int nth) : A(A), B(B), C(C), k(k), lda(lda), ldb(ldb), ldc(ldc), ith(ith), nth(nth) { } @@ -2675,247 +2184,139 @@ class tinyBLAS_PPC { void (tinyBLAS_PPC::*kernel)(int64_t, int64_t); - template - void packTranspose(const TA* a, int64_t lda, int rows, int cols, TA* vec) { + inline void vector_permute_store_4(vector float *src, float *vecOffset) { + vector float t1, t2, t3, t4, t5, t6, t7, t8; + t1 = vec_mergeh(src[0], src[1]); + t2 = vec_mergeh(src[2], src[3]); + t3 = vec_mergel(src[0], src[1]); + t4 = vec_mergel(src[2], src[3]); + + t5 = vec_xxpermdi(t1, t2, 0); + t6 = vec_xxpermdi(t1, t2, 3); + t7 = vec_xxpermdi(t3, t4, 0); + t8 = vec_xxpermdi(t3, t4, 3); + + vec_xst(t5, 0, vecOffset); + vec_xst(t6, 0, vecOffset + 4); + vec_xst(t7, 0, vecOffset + 8); + vec_xst(t8, 0, vecOffset + 12); + } + + inline void vector_permute_store_8(vector float *src, float *vecOffset) { + vector float t1, t2, t3, t4, t5, t6, t7, t8; + t1 = vec_mergeh(src[0], src[1]); + t2 = vec_mergeh(src[2], src[3]); + t3 = vec_mergeh(src[4], src[5]); + t4 = vec_mergeh(src[6], src[7]); + + t5 = vec_xxpermdi(t1, t2, 0); + t6 = vec_xxpermdi(t3, t4, 0); + t7 = vec_xxpermdi(t1, t2, 3); + t8 = vec_xxpermdi(t3, t4, 3); + + vec_xst(t5, 0, vecOffset); + vec_xst(t6, 0, vecOffset + 4); + vec_xst(t7, 0, vecOffset + 8); + vec_xst(t8, 0, vecOffset + 12); + + t1 = vec_mergel(src[0], src[1]); + t2 = vec_mergel(src[2], src[3]); + t3 = vec_mergel(src[4], src[5]); + t4 = vec_mergel(src[6], src[7]); + + t5 = vec_xxpermdi(t1, t2, 0); + t6 = vec_xxpermdi(t3, t4, 0); + t7 = vec_xxpermdi(t1, t2, 3); + t8 = vec_xxpermdi(t3, t4, 3); + + vec_xst(t5, 0, vecOffset + 16); + vec_xst(t6, 0, vecOffset + 20); + vec_xst(t7, 0, vecOffset + 24); + vec_xst(t8, 0, vecOffset + 28); + } + + void packTranspose(const float* a, int64_t lda, int rows, int cols, float* vec) { int64_t i, j; - TA *aoffset = NULL, *boffset = NULL; - TA *aoffset1 = NULL, *aoffset2 = NULL, *aoffset3 = NULL, *aoffset4 = NULL; - TA *aoffset5 = NULL, *aoffset6 = NULL, *aoffset7 = NULL, *aoffset8 = NULL; - __vector_pair C1, C2, C3, C4, C5, C6, C7, C8; - VA c1[2] = {0}, c2[2] = {0}, c3[2] = {0}, c4[2] = {0}; - VA c5[2] = {0}, c6[2] = {0}, c7[2] = {0}, c8[2] = {0}; - VA t1, t2, t3, t4, t5, t6, t7, t8; - aoffset = const_cast(a); + float * aoffsets[8]; + float *aoffset = NULL, *boffset = NULL; + __vector_pair arr[8]; + vector float c[8][2] = {0}; + vector float c1[8] = {0}; + vector float c2[8] = {0}; + aoffset = const_cast(a); boffset = vec; j = (rows >> 3); if (j > 0) { do { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; - aoffset4 = aoffset3 + lda; - aoffset5 = aoffset4 + lda; - aoffset6 = aoffset5 + lda; - aoffset7 = aoffset6 + lda; - aoffset8 = aoffset7 + lda; + aoffsets[0] = aoffset; + for (int it = 1; it< 8; it++) + aoffsets[it] = aoffsets[it-1] + lda; aoffset += 8 * lda; i = (cols >> 3); if (i > 0) { do { - C1 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset1); - C2 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset2); - C3 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset3); - C4 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset4); - C5 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset5); - C6 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset6); - C7 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset7); - C8 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset8); - __builtin_vsx_disassemble_pair(c1, &C1); - __builtin_vsx_disassemble_pair(c2, &C2); - __builtin_vsx_disassemble_pair(c3, &C3); - __builtin_vsx_disassemble_pair(c4, &C4); - __builtin_vsx_disassemble_pair(c5, &C5); - __builtin_vsx_disassemble_pair(c6, &C6); - __builtin_vsx_disassemble_pair(c7, &C7); - __builtin_vsx_disassemble_pair(c8, &C8); - - t1 = vec_mergeh(c1[0], c2[0]); - t2 = vec_mergeh(c3[0], c4[0]); - t3 = vec_mergeh(c5[0], c6[0]); - t4 = vec_mergeh(c7[0], c8[0]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset); - vec_xst(t6, 0, boffset+4); - vec_xst(t7, 0, boffset+8); - vec_xst(t8, 0, boffset+12); - - t1 = vec_mergel(c1[0], c2[0]); - t2 = vec_mergel(c3[0], c4[0]); - t3 = vec_mergel(c5[0], c6[0]); - t4 = vec_mergel(c7[0], c8[0]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset+16); - vec_xst(t6, 0, boffset+20); - vec_xst(t7, 0, boffset+24); - vec_xst(t8, 0, boffset+28); - - t1 = vec_mergeh(c1[1], c2[1]); - t2 = vec_mergeh(c3[1], c4[1]); - t3 = vec_mergeh(c5[1], c6[1]); - t4 = vec_mergeh(c7[1], c8[1]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset+32); - vec_xst(t6, 0, boffset+36); - vec_xst(t7, 0, boffset+40); - vec_xst(t8, 0, boffset+44); - - t1 = vec_mergel(c1[1], c2[1]); - t2 = vec_mergel(c3[1], c4[1]); - t3 = vec_mergel(c5[1], c6[1]); - t4 = vec_mergel(c7[1], c8[1]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset+48); - vec_xst(t6, 0, boffset+52); - vec_xst(t7, 0, boffset+56); - vec_xst(t8, 0, boffset+60); - - aoffset1 += 8*lda; - aoffset2 += 8*lda; - aoffset3 += 8*lda; - aoffset4 += 8*lda; + for (int it = 0; it< 8; it++) { + arr[it] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[it]); + __builtin_vsx_disassemble_pair(c[it], &arr[it]); + c1[it] = c[it][0]; + c2[it] = c[it][1]; + } + + vector_permute_store_8(c1, boffset); + vector_permute_store_8(c2, boffset+32); + for (int it = 0; it < 4; it++) + aoffsets[it] = aoffsets[it] + 8*lda; boffset += 64; i--; } while(i > 0); } if (cols & 4) { - c1[0] = vec_xl(0, aoffset1); - c2[0] = vec_xl(0, aoffset2); - c3[0] = vec_xl(0, aoffset3); - c4[0] = vec_xl(0, aoffset4); - c5[0] = vec_xl(0, aoffset5); - c6[0] = vec_xl(0, aoffset6); - c7[0] = vec_xl(0, aoffset7); - c8[0] = vec_xl(0, aoffset8); - - t1 = vec_mergeh(c1[0], c2[0]); - t2 = vec_mergeh(c3[0], c4[0]); - t3 = vec_mergeh(c5[0], c6[0]); - t4 = vec_mergeh(c7[0], c8[0]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset); - vec_xst(t6, 0, boffset+4); - vec_xst(t7, 0, boffset+8); - vec_xst(t8, 0, boffset+12); - - t1 = vec_mergel(c1[0], c2[0]); - t2 = vec_mergel(c3[0], c4[0]); - t3 = vec_mergel(c5[0], c6[0]); - t4 = vec_mergel(c7[0], c8[0]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t3, t4, 0); - t7 = vec_xxpermdi(t1, t2, 3); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset+16); - vec_xst(t6, 0, boffset+20); - vec_xst(t7, 0, boffset+24); - vec_xst(t8, 0, boffset+28); + for (int it = 0; it < 8 ; it++) + c1[it] = vec_xl(0, aoffsets[it]); + vector_permute_store_8(c1, boffset); } j--; } while(j > 0); } if (rows & 4) { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; - aoffset4 = aoffset3 + lda; + aoffsets[0] = aoffset; + for (int it = 1; it < 4; it++) + aoffsets[it] = aoffsets[it-1] + lda; aoffset += 4 * lda; i = (cols >> 3); if (i > 0) { do { - C1 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset1); - C2 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset2); - C3 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset3); - C4 = __builtin_vsx_lxvp(0, (__vector_pair*)aoffset4); - __builtin_vsx_disassemble_pair(c1, &C1); - __builtin_vsx_disassemble_pair(c2, &C2); - __builtin_vsx_disassemble_pair(c3, &C3); - __builtin_vsx_disassemble_pair(c4, &C4); - - t1 = vec_mergeh(c1[0], c2[0]); - t2 = vec_mergeh(c3[0], c4[0]); - t3 = vec_mergel(c1[0], c2[0]); - t4 = vec_mergel(c3[0], c4[0]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t1, t2, 3); - t7 = vec_xxpermdi(t3, t4, 0); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset); - vec_xst(t6, 0, boffset+4); - vec_xst(t7, 0, boffset+8); - vec_xst(t8, 0, boffset+12); - - t1 = vec_mergeh(c1[1], c2[1]); - t2 = vec_mergeh(c3[1], c4[1]); - t3 = vec_mergel(c1[1], c2[1]); - t4 = vec_mergel(c3[1], c4[1]); - t5 = vec_xxpermdi(t1, t2, 0); - t6 = vec_xxpermdi(t1, t2, 3); - t7 = vec_xxpermdi(t3, t4, 0); - t8 = vec_xxpermdi(t3, t4, 3); - vec_xst(t5, 0, boffset+16); - vec_xst(t6, 0, boffset+20); - vec_xst(t7, 0, boffset+24); - vec_xst(t8, 0, boffset+28); - - aoffset1 += 8*lda; - aoffset2 += 8*lda; - aoffset3 += 8*lda; - aoffset4 += 8*lda; + for (int it = 0; it < 4; it++) { + arr[it] = __builtin_vsx_lxvp(0, (__vector_pair*)aoffsets[it]); + __builtin_vsx_disassemble_pair(c[it], &arr[it]); + c1[it] = c[it][0]; + c2[it] = c[it][1]; + } + vector_permute_store_4(c1, boffset); + vector_permute_store_4(c2, boffset+16); + for (int it = 0; it < 4; it++) + aoffsets[it] += 8*lda; boffset += 32; i--; } while(i > 0); } if (cols & 4) { - c1[0] = vec_xl(0, aoffset1); - c2[0] = vec_xl(0, aoffset2); - c3[0] = vec_xl(0, aoffset3); - c4[0] = vec_xl(0, aoffset4); - - t1 = vec_mergeh(c1[0], c2[0]); - t2 = vec_mergeh(c3[0], c4[0]); - t3 = vec_xxpermdi(t1, t2, 0); - t4 = vec_xxpermdi(t1, t2, 3); - vec_xst(t3, 0, boffset); - vec_xst(t4, 0, boffset+4); - - t1 = vec_mergel(c1[0], c2[0]); - t2 = vec_mergel(c3[0], c4[0]); - t3 = vec_xxpermdi(t1, t2, 0); - t4 = vec_xxpermdi(t1, t2, 3); - vec_xst(t3, 0, boffset+8); - vec_xst(t4, 0, boffset+12); + for (int it = 0; it < 4; it++) + c1[it] = vec_xl(0, aoffsets[it]); + vector_permute_store_4(c1, boffset); } } if (rows & 3) { - aoffset1 = aoffset; - aoffset2 = aoffset1 + lda; - aoffset3 = aoffset2 + lda; + aoffsets[0] = aoffset; + for (int it = 1; it < 3; it++) + aoffsets[it] = aoffsets[it-1] + lda; if (cols & 4) { - c1[0] = vec_xl(0, aoffset1); - c2[0] = vec_xl(0, aoffset2); - c3[0] = vec_xl(0, aoffset3); - - t1 = vec_mergeh(c1[0], c2[0]); - t2 = vec_mergeh(c3[0], c4[0]); - t3 = vec_xxpermdi(t1, t2, 0); - t4 = vec_xxpermdi(t1, t2, 3); - vec_xst(t3, 0, boffset); - vec_xst(t4, 0, boffset+4); - - t1 = vec_mergel(c1[0], c2[0]); - t2 = vec_mergel(c3[0], c4[0]); - t3 = vec_xxpermdi(t1, t2, 0); - t4 = vec_xxpermdi(t1, t2, 3); - vec_xst(t3, 0, boffset+8); - vec_xst(t4, 0, boffset+12); + for (int it = 0; it < 3; it++) + c1[it] = vec_xl(0, aoffsets[it]); + vector_permute_store_4(c1, boffset); } } } @@ -2925,8 +2326,8 @@ class tinyBLAS_PPC { acc_t acc_0; __builtin_mma_xxsetaccz(&acc_0); for (int l = 0; l < k; l+=4) { - packTranspose(A+(ii*lda)+l, lda, 4, 4, (TA*)vec_A); - packTranspose(B+(jj*ldb)+l, ldb, 4, 4, (TA*)vec_B); + packTranspose(A+(ii*lda)+l, lda, 4, 4, (float*)vec_A); + packTranspose(B+(jj*ldb)+l, ldb, 4, 4, (float*)vec_B); __builtin_mma_xvf32gerpp(&acc_0, vec_A[0], vec_B[0]); __builtin_mma_xvf32gerpp(&acc_0, vec_A[1], vec_B[1]); __builtin_mma_xvf32gerpp(&acc_0, vec_A[2], vec_B[2]); @@ -2941,8 +2342,8 @@ class tinyBLAS_PPC { __builtin_mma_xxsetaccz(&acc_0); __builtin_mma_xxsetaccz(&acc_1); for (int64_t l = 0; l < k; l+=4) { - packTranspose(A+(ii*lda)+l, lda, 4, 4, (TA*)vec_A); - packTranspose(B+(jj*ldb)+l, ldb, 8, 4, (TA*)vec_B); + packTranspose(A+(ii*lda)+l, lda, 4, 4, (float*)vec_A); + packTranspose(B+(jj*ldb)+l, ldb, 8, 4, (float*)vec_B); __builtin_mma_xvf32gerpp(&acc_0, vec_A[0], (vec_t)vec_B[0]); __builtin_mma_xvf32gerpp(&acc_1, vec_A[0], (vec_t)vec_B[1]); __builtin_mma_xvf32gerpp(&acc_0, vec_A[1], (vec_t)vec_B[2]); @@ -2962,8 +2363,8 @@ class tinyBLAS_PPC { __builtin_mma_xxsetaccz(&acc_0); __builtin_mma_xxsetaccz(&acc_1); for (int64_t l = 0; l < k; l+=4) { - packTranspose(A+(ii*lda)+l, lda, 8, 4, (TA*)vec_A); - packTranspose(B+(jj*ldb)+l, ldb, 4, 4, (TA*)vec_B); + packTranspose(A+(ii*lda)+l, lda, 8, 4, (float*)vec_A); + packTranspose(B+(jj*ldb)+l, ldb, 4, 4, (float*)vec_B); __builtin_mma_xvf32gerpp(&acc_0, (vec_t)vec_A[0], vec_B[0]); __builtin_mma_xvf32gerpp(&acc_1, (vec_t)vec_A[1], vec_B[0]); __builtin_mma_xvf32gerpp(&acc_0, (vec_t)vec_A[2], vec_B[1]); @@ -2985,8 +2386,8 @@ class tinyBLAS_PPC { __builtin_mma_xxsetaccz(&acc_2); __builtin_mma_xxsetaccz(&acc_3); for (int l = 0; l < k; l+=8) { - packTranspose(A+(ii*lda)+l, lda, 8, 8, (TA*)vec_A); - packTranspose(B+(jj*ldb)+l, ldb, 8, 8, (TA*)vec_B); + packTranspose(A+(ii*lda)+l, lda, 8, 8, (float*)vec_A); + packTranspose(B+(jj*ldb)+l, ldb, 8, 8, (float*)vec_B); for(int x = 0; x < 16; x+=2) { __builtin_mma_xvf32gerpp(&acc_0, (vec_t)vec_A[x], vec_B[x]); __builtin_mma_xvf32gerpp(&acc_1, (vec_t)vec_A[x], vec_B[x+1]); @@ -3001,155 +2402,37 @@ class tinyBLAS_PPC { } void mnpack(int64_t m0, int64_t m, int64_t n0, int64_t n) { - int64_t mc, nc, mp, np; - int m_rem = MIN(m - m0, 16); - int n_rem = MIN(n - n0, 16); - if (m_rem >= 16 && n_rem >= 8) { - mc = 8; - nc = 8; - gemm<8,8>(m0, m, n0, n); - } else if(m_rem >= 8 && n_rem >= 16) { - mc = 8; - nc = 8; - gemm<8,8>(m0, m, n0, n); - } else if (m_rem >= 8 && n_rem >= 8) { - mc = 8; - nc = 8; - gemm<8,8>(m0, m, n0, n); + int m_rem = MIN(m - m0, 8); + int n_rem = MIN(n - n0, 8); + int mc = 0, nc = 0; + if (m_rem >= 8 && n_rem >= 8) { + mc = 8; + nc = 8; + gemm<8, 8>(m0, m, n0, n); } else if (m_rem >= 4 && n_rem >= 8) { - mc = 4; - nc = 8; - gemm<4,8>(m0, m, n0, n); + mc = 4; + nc = 8; + gemm<4, 8>(m0, m, n0, n); } else if (m_rem >= 8 && n_rem >= 4) { - mc = 8; - nc = 4; - gemm<8,4>(m0, m, n0, n); + mc = 8; + nc = 4; + gemm<8, 4>(m0, m, n0, n); } else if (m_rem >= 4 && n_rem >= 4) { - mc = 4; - nc = 4; - gemm<4,4>(m0, m, n0, n); - } else if ((m_rem < 4) && (n_rem > 4)) { - nc = 4; - switch(m_rem) { - case 1: - mc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 2: - mc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 3: - mc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - default: - return; - } - } else if ((m_rem > 4) && (n_rem < 4)) { - mc = 4; - switch(n_rem) { - case 1: - nc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 2: - nc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 3: - nc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - default: - return; - } + mc = 4; + nc = 4; + gemm<4, 4>(m0, m, n0, n); } else { - switch((m_rem << 4) | n_rem) { - case 0x43: - mc = 4; - nc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x42: - mc = 4; - nc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x41: - mc = 4; - nc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x34: - mc = 3; - nc = 4; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x33: - mc = 3; - nc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x32: - mc = 3; - nc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x31: - mc = 3; - nc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x24: - mc = 2; - nc = 4; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x23: - mc = 2; - nc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x22: - mc = 2; - nc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x21: - mc = 2; - nc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x14: - mc = 1; - nc = 4; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x13: - mc = 1; - nc = 3; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x12: - mc = 1; - nc = 2; - gemm_small(m0, m, n0, n, mc, nc); - break; - case 0x11: - mc = 1; - nc = 1; - gemm_small(m0, m, n0, n, mc, nc); - break; - default: - return; - } + mc = (m_rem >= 4) ? 4 : m_rem; + nc = (n_rem >= 4) ? 4 : n_rem; + if (mc == 0 || nc == 0) + return; + gemm_small(m0, m, n0, n, mc, nc); } - mp = m0 + (m - m0) / mc * mc; - np = n0 + (n - n0) / nc * nc; + int64_t mp = m0 + ((m - m0) / mc) * mc; + int64_t np = n0 + ((n - n0) / nc) * nc; mnpack(mp, m, n0, np); mnpack(m0, m, np, n); - } + } void gemm_small(int64_t m0, int64_t m, int64_t n0, int64_t n, int RM, int RN) { int64_t ytiles = (m - m0) / RM; @@ -3174,22 +2457,22 @@ class tinyBLAS_PPC { * matrix elements. */ if (RM == 1) { - TA* a = const_cast(A+(ii)*lda+l); - packTranspose(B+(jj*ldb)+l, ldb, RN, 4, (TA*)vec_B); + float* a = const_cast(A+(ii)*lda+l); + packTranspose(B+(jj*ldb)+l, ldb, RN, 4, (float*)vec_B); vec_A[0] = (vec_t)vec_xl(0,a); - vec_A[1] = (vec_t)vec_splats(*((TA*)&vec_A+1)); - vec_A[2] = (vec_t)vec_splats(*((TA*)&vec_A+2)); - vec_A[3] = (vec_t)vec_splats(*((TA*)&vec_A+3)); + vec_A[1] = (vec_t)vec_splats(*((float*)&vec_A+1)); + vec_A[2] = (vec_t)vec_splats(*((float*)&vec_A+2)); + vec_A[3] = (vec_t)vec_splats(*((float*)&vec_A+3)); } else if (RN == 1) { - packTranspose(A+(ii*lda)+l, lda, RM, 4, (TA*)vec_A); - TB* b = const_cast(B+(jj)*ldb+l); + packTranspose(A+(ii*lda)+l, lda, RM, 4, (float*)vec_A); + float* b = const_cast(B+(jj)*ldb+l); vec_B[0] = (vec_t)vec_xl(0,b); - vec_B[1] = (vec_t)vec_splats(*((TB*)&vec_B+1)); - vec_B[2] = (vec_t)vec_splats(*((TB*)&vec_B+2)); - vec_B[3] = (vec_t)vec_splats(*((TB*)&vec_B+3)); + vec_B[1] = (vec_t)vec_splats(*((float*)&vec_B+1)); + vec_B[2] = (vec_t)vec_splats(*((float*)&vec_B+2)); + vec_B[3] = (vec_t)vec_splats(*((float*)&vec_B+3)); } else { - packTranspose(A+(ii*lda)+l, lda, RM, 4, (TA*)vec_A); - packTranspose(B+(jj*ldb)+l, ldb, RN, 4, (TA*)vec_B); + packTranspose(A+(ii*lda)+l, lda, RM, 4, (float*)vec_A); + packTranspose(B+(jj*ldb)+l, ldb, RN, 4, (float*)vec_B); } __builtin_mma_xvf32gerpp(&acc_0, vec_A[0], vec_B[0]); __builtin_mma_xvf32gerpp(&acc_0, vec_A[1], vec_B[1]); @@ -3199,7 +2482,7 @@ class tinyBLAS_PPC { __builtin_mma_disassemble_acc(vec_C, &acc_0); for (int I = 0; I < RM; I++) { for (int J = 0; J < RN; J++) { - *((TC*)(C+ii+((jj+J)*ldc)+I)) = *((TC*)&vec_C[I]+J); + *((float*)(C+ii+((jj+J)*ldc)+I)) = *((float*)&vec_C[I]+J); } } } @@ -3231,11 +2514,9 @@ class tinyBLAS_PPC { } } - const TA *const A; - const TB *const B; - TC *C; - TA *At; - TB *Bt; + const float *const A; + const float *const B; + float *C; const int64_t k; const int64_t lda; const int64_t ldb; @@ -3323,10 +2604,18 @@ bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t m, int64 (const float *)B, ldb, (float *)C, ldc}; return tb.matmul(m, n); +#elif defined(__VXE__) || defined(__VXE2__) + if (n < 4) + return false; + tinyBLAS<4, float32x4_t, float32x4_t, float, float, float> tb{ params, + k, (const float *)A, lda, + (const float *)B, ldb, + (float *)C, ldc}; + return tb.matmul(m, n); #elif defined(__MMA__) if (k % 8) return false; - tinyBLAS_PPC tb{ + tinyBLAS_PPC tb{ k, (const float *)A, lda, (const float *)B, ldb, (float *)C, ldc, @@ -3414,6 +2703,16 @@ bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t m, int64 (float *)C, ldc}; return tb.matmul(m, n); } +#elif defined(__VXE__) || defined(__VXE2__) + if (n < 4) + return false; + if (Btype == GGML_TYPE_F16) { + tinyBLAS<4, float32x4_t, float32x4_t, ggml_fp16_t, ggml_fp16_t, float> tb{ params, + k, (const ggml_fp16_t *)A, lda, + (const ggml_fp16_t *)B, ldb, + (float *)C, ldc}; + return tb.matmul(m, n); + } #endif return false; } @@ -3443,7 +2742,7 @@ bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t m, int64 return false; if (m < 8 && m != 4) return false; - tinyBLAS_Q0_PPC tb{ + tinyBLAS_Q0_PPC tb{ k, (const block_q8_0 *)A, lda, (const block_q8_0 *)B, ldb, (float *)C, ldc, @@ -3480,7 +2779,7 @@ bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t m, int64 return false; if (m < 8 && m != 4) return false; - tinyBLAS_Q0_PPC tb{ + tinyBLAS_Q0_PPC tb{ k, (const block_q4_0 *)A, lda, (const block_q8_0 *)B, ldb, (float *)C, ldc, diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.h b/ggml/src/ggml-cpu/llamafile/sgemm.h index 3d2909515242a..729e8853d516c 100644 --- a/ggml/src/ggml-cpu/llamafile/sgemm.h +++ b/ggml/src/ggml-cpu/llamafile/sgemm.h @@ -1,6 +1,11 @@ #pragma once #include #include + +#if defined(__VXE__) || defined(__VXE2__) +#include +#endif + #ifdef __cplusplus extern "C" { #endif diff --git a/ggml/src/ggml-cpu/ops.cpp b/ggml/src/ggml-cpu/ops.cpp index 08facb6d03d5e..6581d27adde2e 100644 --- a/ggml/src/ggml-cpu/ops.cpp +++ b/ggml/src/ggml-cpu/ops.cpp @@ -3,6 +3,7 @@ #include "ggml-cpu.h" #include "ggml-impl.h" #include "binary-ops.h" +#include "ggml.h" #include "unary-ops.h" #include "vec.h" @@ -108,7 +109,7 @@ static void ggml_compute_forward_dup_f16( for (int i01 = ir0; i01 < ir1; i01++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); for (int i00 = 0; i00 < ne00; i00++) { - dst_ptr[id] = GGML_FP16_TO_FP32(src0_ptr[i00]); + dst_ptr[id] = GGML_CPU_FP16_TO_FP32(src0_ptr[i00]); id++; } } @@ -130,7 +131,7 @@ static void ggml_compute_forward_dup_f16( const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); for (int i00 = 0; i00 < ne00; i00++) { - src0_f32[i00] = GGML_FP16_TO_FP32(src0_ptr[i00]); + src0_f32[i00] = GGML_CPU_FP16_TO_FP32(src0_ptr[i00]); } quantize_row_q(src0_f32, dst_ptr + id, ne00); @@ -156,7 +157,7 @@ static void ggml_compute_forward_dup_f16( for (int i00 = 0; i00 < ne00; i00++) { const ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); - dst_ptr[id] = GGML_FP16_TO_FP32(*src0_ptr); + dst_ptr[id] = GGML_CPU_FP16_TO_FP32(*src0_ptr); id++; } } @@ -267,7 +268,7 @@ static void ggml_compute_forward_dup_f16( const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); - *(float *) dst_ptr = GGML_FP16_TO_FP32(*(const ggml_fp16_t *) src0_ptr); + *(float *) dst_ptr = GGML_CPU_FP16_TO_FP32(*(const ggml_fp16_t *) src0_ptr); if (++i10 == ne0) { i10 = 0; @@ -372,7 +373,7 @@ static void ggml_compute_forward_dup_bf16( for (int i01 = ir0; i01 < ir1; i01++) { const ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); for (int i00 = 0; i00 < ne00; i00++) { - dst_ptr[id] = GGML_FP32_TO_FP16(GGML_BF16_TO_FP32(src0_ptr[i00])); + dst_ptr[id] = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(src0_ptr[i00])); id++; } } @@ -473,7 +474,7 @@ static void ggml_compute_forward_dup_bf16( for (int i00 = 0; i00 < ne00; i00++) { const ggml_bf16_t * src0_ptr = (ggml_bf16_t *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); - dst_ptr[id] = GGML_FP32_TO_FP16(GGML_BF16_TO_FP32(*src0_ptr)); + dst_ptr[id] = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(*src0_ptr)); id++; } } @@ -566,7 +567,7 @@ static void ggml_compute_forward_dup_bf16( const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); - *(ggml_fp16_t *) dst_ptr = GGML_FP32_TO_FP16(GGML_BF16_TO_FP32(*(const ggml_bf16_t *) src0_ptr)); + *(ggml_fp16_t *) dst_ptr = GGML_CPU_FP32_TO_FP16(GGML_BF16_TO_FP32(*(const ggml_bf16_t *) src0_ptr)); if (++i10 == ne0) { i10 = 0; @@ -696,24 +697,8 @@ static void ggml_compute_forward_dup_f32( if (ggml_is_contiguous(dst)) { // TODO: simplify if (nb00 == sizeof(float)) { - if (dst->type == GGML_TYPE_F32) { - size_t id = 0; - const size_t rs = ne00 * nb00; - char * dst_ptr = (char *) dst->data; - - for (int i03 = 0; i03 < ne03; i03++) { - for (int i02 = 0; i02 < ne02; i02++) { - id += rs * ir0; - for (int i01 = ir0; i01 < ir1; i01++) { - const char * src0_ptr = (char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03; - memcpy(dst_ptr + id, src0_ptr, rs); - id += rs; - } - id += rs * (ne01 - ir1); - } - } - } else if (ggml_get_type_traits_cpu(dst->type)->from_float) { - ggml_from_float_t const quantize_row_q = ggml_get_type_traits_cpu(dst->type)->from_float; + if (ggml_get_type_traits_cpu(dst->type)->from_float) { + ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float; size_t id = 0; size_t rs = nb0 * (ne00 / ggml_blck_size(dst->type)); @@ -724,7 +709,7 @@ static void ggml_compute_forward_dup_f32( id += rs * ir0; for (int i01 = ir0; i01 < ir1; i01++) { const float * src0_ptr = (float *) ((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); - quantize_row_q(src0_ptr, dst_ptr + id, ne00); + from_float(src0_ptr, dst_ptr + id, ne00); id += rs; } id += rs * (ne01 - ir1); @@ -765,7 +750,7 @@ static void ggml_compute_forward_dup_f32( for (int i00 = 0; i00 < ne00; i00++) { const float * src0_ptr = (float *) ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); - dst_ptr[id] = GGML_FP32_TO_FP16(*src0_ptr); + dst_ptr[id] = GGML_CPU_FP32_TO_FP16(*src0_ptr); id++; } } @@ -878,7 +863,7 @@ static void ggml_compute_forward_dup_f32( const char * src0_ptr = ((char *) src0->data + i00*nb00 + i01*nb01 + i02*nb02 + i03*nb03); char * dst_ptr = ((char *) dst->data + i10*nb0 + i11*nb1 + i12*nb2 + i13*nb3); - *(ggml_fp16_t *) dst_ptr = GGML_FP32_TO_FP16(*(const float *) src0_ptr); + *(ggml_fp16_t *) dst_ptr = GGML_CPU_FP32_TO_FP16(*(const float *) src0_ptr); if (++i10 == ne0) { i10 = 0; @@ -1419,7 +1404,7 @@ static void ggml_compute_forward_add1_f16_f32( ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i = 0; i < ne0; i++) { - dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + v); + dst_ptr[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(src0_ptr[i]) + v); } } } @@ -1435,7 +1420,7 @@ static void ggml_compute_forward_add1_f16_f16( GGML_ASSERT(ggml_is_scalar(src1)); // scalar to add - const float v = GGML_FP16_TO_FP32(*(ggml_fp16_t *) src1->data); + const float v = GGML_CPU_FP16_TO_FP32(*(ggml_fp16_t *) src1->data); const int ith = params->ith; const int nth = params->nth; @@ -1467,7 +1452,7 @@ static void ggml_compute_forward_add1_f16_f16( ggml_fp16_t * dst_ptr = (ggml_fp16_t *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ); ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01); for (int i = 0; i < ne0; i++) { - dst_ptr[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(src0_ptr[i]) + v); + dst_ptr[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(src0_ptr[i]) + v); } } } @@ -1889,7 +1874,7 @@ static void ggml_compute_forward_sum_f16( } } } - ((ggml_fp16_t *) dst->data)[0] = GGML_FP32_TO_FP16(sum); + ((ggml_fp16_t *) dst->data)[0] = GGML_CPU_FP32_TO_FP16(sum); } static void ggml_compute_forward_sum_bf16( @@ -2300,6 +2285,12 @@ void ggml_compute_forward_repeat( { ggml_compute_forward_repeat_f32(params, dst); } break; + // TODO: templateify the implemenation and support for I64 + // ref https://github.com/ggml-org/llama.cpp/pull/14274#discussion_r2169492225 + //case GGML_TYPE_I64: + // { + // ggml_compute_forward_repeat_i64(params, dst); + // } break; default: { GGML_ABORT("fatal error"); @@ -2660,7 +2651,7 @@ static void ggml_compute_forward_gelu_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; - const float v = GGML_FP16_TO_FP32(x); + const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); @@ -2763,7 +2754,7 @@ static void ggml_compute_forward_gelu_erf_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; - const float v = GGML_FP16_TO_FP32(x); + const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); @@ -2866,7 +2857,7 @@ static void ggml_compute_forward_gelu_quick_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; - const float v = GGML_FP16_TO_FP32(x); + const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); @@ -2969,7 +2960,7 @@ static void ggml_compute_forward_silu_f16( #ifndef NDEBUG for (int k = 0; k < nc; k++) { const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])))[k]; - const float v = GGML_FP16_TO_FP32(x); + const float v = GGML_CPU_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); @@ -3144,8 +3135,718 @@ static void ggml_compute_forward_silu_back_f16( const int ith = params->ith; const int nth = params->nth; - const int nc = src1->ne[0]; - const int nr = ggml_nrows(src1); + const int nc = src1->ne[0]; + const int nr = ggml_nrows(src1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_vec_silu_backward_f16(nc, + (ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])), + (ggml_fp16_t *) ((char *) src1->data + i1*(src1->nb[1])), + (ggml_fp16_t *) ((char *) grad->data + i1*(grad->nb[1]))); + + #ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float v = GGML_CPU_FP16_TO_FP32(x); + GGML_UNUSED(v); + assert(!isnan(v)); + assert(!isinf(v)); + } + #endif + } +} + +void ggml_compute_forward_silu_back( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_silu_back_f32(params, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_silu_back_f16(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + +// ggml_compute_forward_reglu + +static void ggml_compute_forward_reglu_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * src0_p = (float *) (src0_d + i1*src0_o); + float * src1_p = (float *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_reglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + GGML_UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_reglu_f16( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o); + ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_reglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float v = GGML_FP16_TO_FP32(x); + GGML_UNUSED(v); + assert(!isnan(v)); + assert(!isinf(v)); + } +#endif + } +} + +static void ggml_compute_forward_reglu( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_reglu_f32(params, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_reglu_f16(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + +// ggml_compute_forward_geglu + +static void ggml_compute_forward_geglu_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * src0_p = (float *) (src0_d + i1*src0_o); + float * src1_p = (float *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + GGML_UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_geglu_f16( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o); + ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float v = GGML_FP16_TO_FP32(x); + GGML_UNUSED(v); + assert(!isnan(v)); + assert(!isinf(v)); + } +#endif + } +} + +static void ggml_compute_forward_geglu( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_geglu_f32(params, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_geglu_f16(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + +// ggml_compute_forward_swiglu + +static void ggml_compute_forward_swiglu_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * src0_p = (float *) (src0_d + i1*src0_o); + float * src1_p = (float *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_swiglu_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + GGML_UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_swiglu_f16( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o); + ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_swiglu_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float v = GGML_FP16_TO_FP32(x); + GGML_UNUSED(v); + assert(!isnan(v)); + assert(!isinf(v)); + } +#endif + } +} + +static void ggml_compute_forward_swiglu( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_swiglu_f32(params, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_swiglu_f16(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + +// ggml_compute_forward_geglu_erf + +static void ggml_compute_forward_geglu_erf_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * src0_p = (float *) (src0_d + i1*src0_o); + float * src1_p = (float *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_erf_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + GGML_UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_geglu_erf_f16( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o); + ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_erf_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const float v = GGML_FP16_TO_FP32(x); + GGML_UNUSED(v); + assert(!isnan(v)); + assert(!isinf(v)); + } +#endif + } +} + +static void ggml_compute_forward_geglu_erf( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_geglu_erf_f32(params, dst); + } break; + case GGML_TYPE_F16: + { + ggml_compute_forward_geglu_erf_f16(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + +// ggml_compute_forward_geglu_quick + +static void ggml_compute_forward_geglu_quick_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); + + // rows per thread + const int dr = (nr + nth - 1)/nth; + + // row range for this thread + const int ir0 = dr*ith; + const int ir1 = MIN(ir0 + dr, nr); + + for (int i1 = ir0; i1 < ir1; i1++) { + float * src0_p = (float *) (src0_d + i1*src0_o); + float * src1_p = (float *) (src1_d + i1*src1_o); + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_quick_f32(nc, (float *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG + for (int k = 0; k < nc; k++) { + const float x = ((float *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + GGML_UNUSED(x); + assert(!isnan(x)); + assert(!isinf(x)); + } +#endif + } +} + +static void ggml_compute_forward_geglu_quick_f16( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + char * src0_d = (char *) src0->data; + char * src1_d = (char *) (src1 ? src1->data : src0->data); + const size_t src0_o = src0->nb[1]; + const size_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(ggml_is_contiguous_1(dst)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src0->type == src1->type); + } + + const int ith = params->ith; + const int nth = params->nth; + + const int nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + const int nr = ggml_nrows(src0); + + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == nr); + + const int32_t swapped = ggml_get_op_params_i32(dst, 1); // rows per thread const int dr = (nr + nth - 1)/nth; @@ -3155,24 +3856,29 @@ static void ggml_compute_forward_silu_back_f16( const int ir1 = MIN(ir0 + dr, nr); for (int i1 = ir0; i1 < ir1; i1++) { - ggml_vec_silu_backward_f16(nc, - (ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])), - (ggml_fp16_t *) ((char *) src1->data + i1*(src1->nb[1])), - (ggml_fp16_t *) ((char *) grad->data + i1*(grad->nb[1]))); + ggml_fp16_t * src0_p = (ggml_fp16_t *) (src0_d + i1*src0_o); + ggml_fp16_t * src1_p = (ggml_fp16_t *) (src1_d + i1*src1_o); - #ifndef NDEBUG + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + ggml_vec_geglu_quick_f16(nc, (ggml_fp16_t *) ((char *) dst->data + i1*(dst->nb[1])), src0_p, src1_p); + +#ifndef NDEBUG for (int k = 0; k < nc; k++) { - const float x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; + const ggml_fp16_t x = ((ggml_fp16_t *) ((char *) dst->data + i1*( dst->nb[1])))[k]; const float v = GGML_FP16_TO_FP32(x); GGML_UNUSED(v); assert(!isnan(v)); assert(!isinf(v)); } - #endif +#endif } } -void ggml_compute_forward_silu_back( +static void ggml_compute_forward_geglu_quick( const ggml_compute_params * params, ggml_tensor * dst) { @@ -3181,11 +3887,11 @@ void ggml_compute_forward_silu_back( switch (src0->type) { case GGML_TYPE_F32: { - ggml_compute_forward_silu_back_f32(params, dst); + ggml_compute_forward_geglu_quick_f32(params, dst); } break; case GGML_TYPE_F16: { - ggml_compute_forward_silu_back_f16(params, dst); + ggml_compute_forward_geglu_quick_f16(params, dst); } break; default: { @@ -3309,6 +4015,9 @@ static void ggml_compute_forward_rms_norm_f32( const float scale = 1.0f/sqrtf(mean + eps); + // if you hit this, likely you got an inf somewhere earlier + assert(scale > 0.0f); + ggml_vec_scale_f32(ne00, y, scale); } } @@ -3937,9 +4646,11 @@ static void ggml_compute_forward_scale_f32( GGML_ASSERT(ggml_is_contiguous(dst)); GGML_ASSERT(ggml_are_same_shape(src0, dst)); - // scale factor - float v; - memcpy(&v, dst->op_params, sizeof(float)); + float s; // scale factor + float b; // bias + + memcpy(&s, (float *) dst->op_params + 0, sizeof(float)); + memcpy(&b, (float *) dst->op_params + 1, sizeof(float)); const int ith = params->ith; const int nth = params->nth; @@ -3958,12 +4669,22 @@ static void ggml_compute_forward_scale_f32( const size_t nb1 = dst->nb[1]; - for (int i1 = ir0; i1 < ir1; i1++) { - if (dst->data != src0->data) { - // src0 is same shape as dst => same indices - memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); + if (b == 0.0f) { + for (int i1 = ir0; i1 < ir1; i1++) { + if (dst->data != src0->data) { + // src0 is same shape as dst => same indices + // TODO: add x parameter to ggml_vec_scale_f32 and remove this memcpy + memcpy((char *)dst->data + i1*nb1, (char *)src0->data + i1*nb01, nc * sizeof(float)); + } + ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), s); + } + } else { + for (int i1 = ir0; i1 < ir1; i1++) { + ggml_vec_mad1_f32(nc, + (float *) ((char *) dst->data + i1*nb1), + (float *) ((char *) src0->data + i1*nb1), + s, b); } - ggml_vec_scale_f32(nc, (float *) ((char *) dst->data + i1*nb1), v); } } @@ -4470,6 +5191,74 @@ void ggml_compute_forward_get_rows( //} } +static void ggml_compute_forward_set_rows_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + + GGML_TENSOR_BINARY_OP_LOCALS + + const int64_t nc = ne00; + const int64_t nr = ne01; + + assert(ne0 == nc); + assert(ne2 == ne02); + assert(ne3 == ne03); + assert(src0->type == GGML_TYPE_F32); + assert(ne02 % ne11 == 0); + assert(ne03 % ne12 == 0); + + const int ith = params->ith; + const int nth = params->nth; + + // rows per thread + const int64_t dr = (nr + nth - 1)/nth; + + // row range for this thread + const int64_t ir0 = dr*ith; + const int64_t ir1 = std::min(ir0 + dr, nr); + + ggml_from_float_t const from_float = ggml_get_type_traits_cpu(dst->type)->from_float; + + for (int64_t i03 = 0; i03 < ne03; ++i03) { + for (int64_t i02 = 0; i02 < ne02; ++i02) { + for (int64_t i = ir0; i < ir1; ++i) { + const int64_t i12 = i03%ne12; + const int64_t i11 = i02%ne11; + const int64_t i10 = i; + + const int64_t i1 = *(int64_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12); + + GGML_ASSERT(i1 >= 0 && i1 < ne1); + + from_float( + (const float *) ((char *) src0->data + i*nb01 + i02*nb02 + i03*nb03), + ((char *) dst->data + i1*nb1 + i02*nb2 + i03*nb3), nc); + } + } + } +} + +void ggml_compute_forward_set_rows( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_set_rows_f32(params, dst); + } break; + default: + { + GGML_ABORT("src0->type = %d (%s) not supported", src0->type, ggml_type_name(src0->type)); + } + } +} + // ggml_compute_forward_get_rows_back static void ggml_compute_forward_get_rows_back_f32_f16( @@ -4500,7 +5289,7 @@ static void ggml_compute_forward_get_rows_back_f32_f16( for (int j = 0; j < nc; ++j) { ggml_fp16_t v = ((ggml_fp16_t *) ((char *) src0->data + i*src0->nb[1]))[j]; - ((float *) ((char *) dst->data + r*dst->nb[1]))[j] += GGML_FP16_TO_FP32(v); + ((float *) ((char *) dst->data + r*dst->nb[1]))[j] += GGML_CPU_FP16_TO_FP32(v); } } } @@ -4744,14 +5533,17 @@ static void ggml_compute_forward_soft_max_f32( memcpy(&scale, (float *) dst->op_params + 0, sizeof(float)); memcpy(&max_bias, (float *) dst->op_params + 1, sizeof(float)); - // TODO: handle transposed/permuted matrices - const int ith = params->ith; const int nth = params->nth; GGML_TENSOR_UNARY_OP_LOCALS - //const int64_t ne11 = src1 ? src1->ne[1] : 1; + const int64_t nb11 = src1 ? src1->nb[1] : 1; + const int64_t nb12 = src1 ? src1->nb[2] : 1; + const int64_t nb13 = src1 ? src1->nb[3] : 1; + + const int64_t ne12 = src1 ? src1->ne[2] : 1; + const int64_t ne13 = src1 ? src1->ne[3] : 1; // TODO: is this supposed to be ceil instead of floor? // https://huggingface.co/mosaicml/mpt-7b/blob/main/attention.py#L370 @@ -4761,68 +5553,66 @@ static void ggml_compute_forward_soft_max_f32( const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); - const int nc = src0->ne[0]; - const int nr = ggml_nrows(src0); - - // rows per thread - const int dr = (nr + nth - 1)/nth; - - // row range for this thread - const int ir0 = dr*ith; - const int ir1 = MIN(ir0 + dr, nr); - - float * wp = (float *) params->wdata + (nc + CACHE_LINE_SIZE_F32) * ith; + float * wp = (float *) params->wdata + (ne00 + CACHE_LINE_SIZE_F32) * ith; const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16); - for (int i1 = ir0; i1 < ir1; i1++) { - // ALiBi - const uint32_t h = (i1/ne01)%ne02; // head - const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f; - - float * sp = (float *)((char *) src0->data + i1*src0->nb[1]); - float * dp = (float *)((char *) dst->data + i1*dst->nb[1]); - - // broadcast the mask across rows - ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL; - float * mp_f32 = src1 ? (float *)((char *) src1->data) + (i1%ne01)*ne00 : NULL; - - ggml_vec_cpy_f32 (nc, wp, sp); - ggml_vec_scale_f32(nc, wp, scale); - if (mp_f32) { - if (use_f16) { - for (int i = 0; i < nc; ++i) { - wp[i] += slope*GGML_FP16_TO_FP32(mp_f16[i]); - } - } else { - for (int i = 0; i < nc; ++i) { - wp[i] += slope*mp_f32[i]; + for (int64_t i03 = 0; i03 < ne03; i03++) { + for (int64_t i02 = 0; i02 < ne02; i02++) { + for (int64_t i01 = ith; i01 < ne01; i01 += nth) { + const int64_t i11 = i01; + const int64_t i12 = i02%ne12; + const int64_t i13 = i03%ne13; + + // ALiBi + const uint32_t h = i02; // head + const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f; + + float * sp = (float *)((char *) src0->data + i01*nb01 + i02*nb02 + i03*nb03); + float * dp = (float *)((char *) dst->data + i01*nb1 + i02*nb2 + i03*nb3); + + // broadcast the mask across rows + ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13) : NULL; + float * mp_f32 = src1 ? (float *)((char *) src1->data + i11*nb11 + i12*nb12 + i13*nb13) : NULL; + + ggml_vec_cpy_f32 (ne00, wp, sp); + ggml_vec_scale_f32(ne00, wp, scale); + if (mp_f32) { + if (use_f16) { + for (int i = 0; i < ne00; ++i) { + wp[i] += slope*GGML_CPU_FP16_TO_FP32(mp_f16[i]); + } + } else { + for (int i = 0; i < ne00; ++i) { + wp[i] += slope*mp_f32[i]; + } + } } - } - } #ifndef NDEBUG - for (int i = 0; i < nc; ++i) { - //printf("p[%d] = %f\n", i, p[i]); - assert(!isnan(wp[i])); - } + for (int i = 0; i < ne00; ++i) { + //printf("p[%d] = %f\n", i, p[i]); + assert(!isnan(wp[i])); + } #endif - float max = -INFINITY; - ggml_vec_max_f32(nc, &max, wp); + float max = -INFINITY; + ggml_vec_max_f32(ne00, &max, wp); - ggml_float sum = ggml_vec_soft_max_f32(nc, dp, wp, max); - assert(sum > 0.0); + ggml_float sum = ggml_vec_soft_max_f32(ne00, dp, wp, max); + assert(sum > 0.0); - sum = 1.0/sum; - ggml_vec_scale_f32(nc, dp, sum); + sum = 1.0/sum; + ggml_vec_scale_f32(ne00, dp, sum); #ifndef NDEBUG - for (int i = 0; i < nc; ++i) { - assert(!isnan(dp[i])); - assert(!isinf(dp[i])); - } + for (int i = 0; i < ne00; ++i) { + assert(!isnan(dp[i])); + assert(!isinf(dp[i])); + } #endif + } + } } } @@ -5018,8 +5808,8 @@ static void ggml_compute_forward_clamp_f16( ggml_fp16_t * src0_ptr = (ggml_fp16_t *) ((char *) src0->data + j*nb01); for (int i = 0; i < nc; i++) { - float v = GGML_FP16_TO_FP32(src0_ptr[i]); - dst_ptr[i] = GGML_FP32_TO_FP16(MAX(MIN(v, max), min)); + float v = GGML_CPU_FP16_TO_FP32(src0_ptr[i]); + dst_ptr[i] = GGML_CPU_FP32_TO_FP16(MAX(MIN(v, max), min)); } } } @@ -5476,11 +6266,11 @@ static void ggml_compute_forward_rope_f16( const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0); - const float x0 = GGML_FP16_TO_FP32(src[0]); - const float x1 = GGML_FP16_TO_FP32(src[n_dims]); + const float x0 = GGML_CPU_FP16_TO_FP32(src[0]); + const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims]); - dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); - dst_data[n_dims] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[n_dims] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); } } else { for (int64_t i0 = 0; i0 < n_dims; i0 += 2) { @@ -5492,11 +6282,11 @@ static void ggml_compute_forward_rope_f16( const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0); - const float x0 = GGML_FP16_TO_FP32(src[0]); - const float x1 = GGML_FP16_TO_FP32(src[n_dims/2]); + const float x0 = GGML_CPU_FP16_TO_FP32(src[0]); + const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims/2]); - dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); - dst_data[n_dims/2] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[n_dims/2] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); } } } else { @@ -5507,11 +6297,11 @@ static void ggml_compute_forward_rope_f16( const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + i0*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); - const float x0 = GGML_FP16_TO_FP32(src[0]); - const float x1 = GGML_FP16_TO_FP32(src[1]); + const float x0 = GGML_CPU_FP16_TO_FP32(src[0]); + const float x1 = GGML_CPU_FP16_TO_FP32(src[1]); - dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); - dst_data[1] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[1] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); } } @@ -5525,11 +6315,11 @@ static void ggml_compute_forward_rope_f16( const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01 + ic*nb00); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + ic*nb0); - const float x0 = GGML_FP16_TO_FP32(src[0]); - const float x1 = GGML_FP16_TO_FP32(src[n_dims]); + const float x0 = GGML_CPU_FP16_TO_FP32(src[0]); + const float x1 = GGML_CPU_FP16_TO_FP32(src[n_dims]); - dst_data[0] = GGML_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); - dst_data[n_dims] = GGML_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); + dst_data[0] = GGML_CPU_FP32_TO_FP16(x0*cos_theta - x1*sin_theta); + dst_data[n_dims] = GGML_CPU_FP32_TO_FP16(x0*sin_theta + x1*cos_theta); } } else { for (int64_t i0 = n_dims; i0 < ne0; i0 += 2) { @@ -5640,7 +6430,7 @@ static void ggml_compute_forward_conv_transpose_1d_f16_f32( for (int64_t i11 = 0; i11 < ne11; i11++) { const float * const src = (float *)((char *) src1->data + i11*nb11); for (int64_t i10 = 0; i10 < ne10; i10++) { - dst_data[i10*ne11 + i11] = GGML_FP32_TO_FP16(src[i10]); + dst_data[i10*ne11 + i11] = GGML_CPU_FP32_TO_FP16(src[i10]); } } } @@ -5933,7 +6723,7 @@ static void ggml_compute_forward_im2col_f16( if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) { dst_data[iic*(KH*KW) + ikh*KW + ikw] = 0; } else { - dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_FP32_TO_FP16(src_data[iih*IW + iiw]); + dst_data[iic*(KH*KW) + ikh*KW + ikw] = GGML_CPU_FP32_TO_FP16(src_data[iih*IW + iiw]); } } } @@ -6058,6 +6848,186 @@ void ggml_compute_forward_im2col_back_f32( } } +static void ggml_call_mul_mat(ggml_type type, const ggml_compute_params * params, int64_t m, int64_t n, int64_t k, + void * a, void * b, float * c) { + const ggml_type_traits * traits = ggml_get_type_traits(type); + struct ggml_tensor src1 = {}; + src1.type = type; + src1.ne[0] = k; + src1.ne[1] = m; + src1.ne[2] = 1; + src1.ne[3] = 1; + src1.nb[0] = traits->type_size; + src1.nb[1] = k * traits->type_size; + src1.nb[2] = src1.nb[1]; + src1.nb[3] = src1.nb[2]; + src1.data = a; + + struct ggml_tensor src0 = {}; + src0.type = type; + src0.ne[0] = k; + src0.ne[1] = n; + src0.ne[2] = 1; + src0.ne[3] = 1; + src0.nb[0] = traits->type_size; + src0.nb[1] = k * traits->type_size; + src0.nb[2] = src0.nb[1]; + src0.nb[3] = src0.nb[2]; + src0.data = b; + + struct ggml_tensor dst = {}; + dst.ne[0] = n; + dst.ne[1] = m; + dst.ne[2] = 1; + dst.ne[3] = 1; + dst.nb[0] = sizeof(float); + dst.nb[1] = n * sizeof(float); + dst.nb[2] = dst.nb[1]; + dst.nb[3] = dst.nb[2]; + dst.data = c; + dst.src[0] = &src0; + dst.src[1] = &src1; + + ggml_compute_forward_mul_mat(params, &dst); +} + +// ggml_compute_forward_conv_2d + +static void ggml_compute_forward_conv_2d_impl(const ggml_compute_params * params, + const ggml_tensor * kernel, // [KW, KH, IC, OC] + const ggml_tensor * src, // [W, H, C, N] + ggml_tensor * dst, // [OW, OH, OC, N] + ggml_type kernel_type) { + + GGML_ASSERT(ggml_is_contiguous(kernel)); + GGML_ASSERT(kernel_type == GGML_TYPE_F16 || kernel_type == GGML_TYPE_F32); + GGML_ASSERT(kernel->type == kernel_type); + + const ggml_type_traits * traits = ggml_get_type_traits(kernel_type); + + const int32_t stride_x = dst->op_params[0]; + const int32_t stride_y = dst->op_params[1]; + const int32_t pad_x = dst->op_params[2]; + const int32_t pad_y = dst->op_params[3]; + const int32_t dilation_x = dst->op_params[4]; + const int32_t dilation_y = dst->op_params[5]; + + const int64_t c_in = src->ne[2]; + const int64_t c_out = kernel->ne[3]; + GGML_ASSERT(c_in == kernel->ne[2]); + + const int64_t src_w = src->ne[0]; + const int64_t src_h = src->ne[1]; + const int64_t knl_w = kernel->ne[0]; + const int64_t knl_h = kernel->ne[1]; + const int64_t dst_w = dst->ne[0]; + const int64_t dst_h = dst->ne[1]; + + const float * src_data = (float *) src->data; + void * knl_data = kernel->data; + float * dst_data = (float *) dst->data; + + const int64_t knl_n = knl_w * knl_h * c_in; + const int64_t patch_total = dst->ne[3] * dst_w * dst_h; + + const int64_t space_per_patch = knl_n * traits->type_size + c_out * sizeof(float); + const int64_t batch_size = params->wsize / space_per_patch; + const int64_t patches_per_batch = batch_size > 8 ? (batch_size / 8) * 8 : batch_size; + const int64_t batch_n = (patch_total + patches_per_batch - 1) / patches_per_batch; + + GGML_ASSERT(patches_per_batch > 0 && batch_size >= 1); + + void * tmp = params->wdata; + + for (int64_t batch_i = 0; batch_i < batch_n; ++batch_i) { + + const int64_t patch_start_batch = batch_i * patches_per_batch; + const int64_t patch_end_batch = std::min(patch_start_batch + patches_per_batch, + patch_total); + const int64_t patch_n = patch_end_batch - patch_start_batch; + + const int64_t patch_per_thread = (patch_n + params->nth - 1) / params->nth; + const int64_t patch_start = patch_start_batch + params->ith * patch_per_thread; + const int64_t patch_end = std::min(patch_start + patch_per_thread, patch_end_batch); + + //im2col for a patch + for (int64_t p = patch_start; p < patch_end; ++p) { + const int64_t batch_n = p / (dst_w * dst_h); + const int64_t src_x = (p / dst_w) % dst_h; + const int64_t src_y = p % dst_w; + + const float * src_base = (const float *)((const char *)src_data + batch_n * src->nb[3]); + char * dst_row = (char *) tmp + (p % patches_per_batch) * knl_n * traits->type_size; + + for (int64_t ic = 0; ic < c_in; ++ic) { + for (int64_t ky = 0; ky < knl_h; ++ky) { + for (int64_t kx = 0; kx < knl_w; ++kx) { + const int64_t sy = src_x * stride_y + ky * dilation_y - pad_y; + const int64_t sx = src_y * stride_x + kx * dilation_x - pad_x; + + int64_t dst_idx = ic * (knl_h * knl_w) + ky * knl_w + kx; + + float src_val; + if (sy < 0 || sy >= src_h || sx < 0 || sx >= src_w) { + src_val = 0.0f; + } else { + const float * src_ptr = (const float *)((const char *)src_base + sx * src->nb[0] + sy * src->nb[1] + ic * src->nb[2]); + src_val = *src_ptr; + } + + char * element_ptr = dst_row + dst_idx * traits->type_size; + if (kernel_type == GGML_TYPE_F32) { + *(float *) element_ptr = src_val; + } else if (kernel_type == GGML_TYPE_F16) { + *(ggml_fp16_t *) element_ptr = GGML_CPU_FP32_TO_FP16(src_val); + } + } + } + } + } // patches handled by this thread + + ggml_barrier(params->threadpool); + + float * gemm_output = (float *) ((char *) tmp + patches_per_batch * knl_n * traits->type_size); + + GGML_ASSERT(gemm_output + patch_n * c_out <= (float*)tmp + params->wsize); + + // GEMM: patches[patch_n, knl_n] × kernel[knl_n, c_out] = output[patch_n, c_out] + ggml_call_mul_mat(kernel_type, params, patch_n, c_out, knl_n, tmp, knl_data, gemm_output); + + ggml_barrier(params->threadpool); + + + //permute back [OC, N, OH, OW] to [N, OC, OH, OW] + const int64_t permute_per_thread = (patch_n + params->nth - 1) / params->nth; + const int64_t permute_start = params->ith * permute_per_thread; + const int64_t permute_end = std::min(permute_start + permute_per_thread, patch_n); + + for (int64_t i = permute_start; i < permute_end; ++i) { + const int64_t p = patch_start_batch + i; + const int64_t batch_n = p / (dst_w * dst_h); + const int64_t dst_y = (p / dst_w) % dst_h; + const int64_t dst_x = p % dst_w; + + for (int64_t oc = 0; oc < c_out; ++oc) { + const float value = gemm_output[i * c_out + oc]; + float * dst_ptr = (float *)((char *)dst_data + dst_x * dst->nb[0] + dst_y * dst->nb[1] + oc * dst->nb[2] + batch_n * dst->nb[3]); + *dst_ptr = value; + } + } + } +} + +void ggml_compute_forward_conv_2d( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + + ggml_compute_forward_conv_2d_impl(params, src0, src1, dst, src0->type); +} + // ggml_compute_forward_conv_transpose_2d void ggml_compute_forward_conv_transpose_2d( @@ -6109,7 +7079,7 @@ void ggml_compute_forward_conv_transpose_2d( const float * const src = (float *)((char *) src1->data + i12*nb12 + i11*nb11); ggml_fp16_t * dst_data = wdata + i11*ne10*ne12; for (int i10 = 0; i10 < ne10; i10++) { - dst_data[i10*ne12 + i12] = GGML_FP32_TO_FP16(src[i10]); + dst_data[i10*ne12 + i12] = GGML_CPU_FP32_TO_FP16(src[i10]); } } } @@ -6358,7 +7328,7 @@ static void ggml_compute_forward_pool_1d_sk_p0( case GGML_OP_POOL_COUNT: GGML_ABORT("fatal error"); } for (int ki = 0; ki < k; ++ki) { - const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); + const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); switch (op) { case GGML_OP_POOL_AVG: drow[i] += srow_j; break; case GGML_OP_POOL_MAX: if (srow_j > drow[i]) drow[i] = srow_j; break; @@ -6450,7 +7420,7 @@ void ggml_compute_forward_pool_2d( for (int kx = 0; kx < k0; ++kx) { int j = ix + kx; if (j < 0 || j >= src->ne[0]) continue; - const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); + const float srow_j = (src->type == GGML_TYPE_F32) ? ((const float*)srow)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t*)srow)[j]); switch (op) { case GGML_OP_POOL_AVG: *out += srow_j; break; case GGML_OP_POOL_MAX: if (srow_j > *out) *out = srow_j; break; @@ -6538,7 +7508,7 @@ void ggml_compute_forward_pool_2d_back( } const float val = dst->type == GGML_TYPE_F32 ? - ((const float *) drowf)[j] : GGML_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]); + ((const float *) drowf)[j] : GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t *) drowf)[j]); if (val <= maxval) { continue; } @@ -6558,7 +7528,7 @@ void ggml_compute_forward_pool_2d_back( if (dst->type == GGML_TYPE_F32) { ((float *) drow)[j] += grad0; } else { - ((ggml_fp16_t *) drow)[j] = GGML_FP32_TO_FP16(grad0 + GGML_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j])); + ((ggml_fp16_t *) drow)[j] = GGML_CPU_FP32_TO_FP16(grad0 + GGML_CPU_FP16_TO_FP32(((const ggml_fp16_t *) drow)[j])); } } else if (op == GGML_OP_POOL_AVG) { const float grad = grad0 / ka; @@ -6577,7 +7547,7 @@ void ggml_compute_forward_pool_2d_back( if (dst->type == GGML_TYPE_F32) { ((float *) drow)[j] += grad; } else { - ((ggml_fp16_t *) drow)[j] += GGML_FP32_TO_FP16(grad); + ((ggml_fp16_t *) drow)[j] += GGML_CPU_FP32_TO_FP16(grad); } } } @@ -6608,12 +7578,13 @@ static void ggml_compute_forward_upscale_f32( GGML_TENSOR_UNARY_OP_LOCALS - const float sf0 = (float)ne0/src0->ne[0]; - const float sf1 = (float)ne1/src0->ne[1]; - const float sf2 = (float)ne2/src0->ne[2]; - const float sf3 = (float)ne3/src0->ne[3]; + float sf0 = (float)ne0/src0->ne[0]; + float sf1 = (float)ne1/src0->ne[1]; + float sf2 = (float)ne2/src0->ne[2]; + float sf3 = (float)ne3/src0->ne[3]; - const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0); + const int32_t mode_flags = ggml_get_op_params_i32(dst, 0); + const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF); if (mode == GGML_SCALE_MODE_NEAREST) { for (int64_t i3 = 0; i3 < ne3; i3++) { @@ -6634,8 +7605,12 @@ static void ggml_compute_forward_upscale_f32( } } } else if (mode == GGML_SCALE_MODE_BILINEAR) { - // setting a pixel offset of 0 would replicate the behavior of pytorch interpolate with align_corners=True - const float pixel_offset = 0.5f; + float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { + pixel_offset = 0.0f; + sf0 = (float)(ne0 - 1) / (src0->ne[0] - 1); + sf1 = (float)(ne1 - 1) / (src0->ne[1] - 1); + } for (int64_t i3 = 0; i3 < ne3; i3++) { const int64_t i03 = i3 / sf3; @@ -6793,6 +7768,73 @@ void ggml_compute_forward_pad_reflect_1d( } } +// ggml_compute_forward_roll + +static int64_t ggml_wrap_index(int64_t i, int64_t ne) { + if (i < 0) { + return i + ne; + } else if (i >= ne) { + return i - ne; + } + return i; +} + +static void ggml_compute_forward_roll_f32( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + const float * src_data = (const float *) src0->data; + float * dst_data = (float *) dst->data; + + GGML_TENSOR_UNARY_OP_LOCALS + + const int s0 = ggml_get_op_params_i32(dst, 0); + const int s1 = ggml_get_op_params_i32(dst, 1); + const int s2 = ggml_get_op_params_i32(dst, 2); + const int s3 = ggml_get_op_params_i32(dst, 3); + + const int64_t total = ne1 * ne2 * ne3; + const int64_t per_thread = (total + params->nth) / params->nth; + const int64_t start = params->ith * per_thread; + const int64_t end = std::min(start + per_thread, total); + + for (int64_t i = start; i < end; ++i) { + const int64_t i1 = i % ne1; + const int64_t i2 = (i / ne1) % ne2; + const int64_t i3 = i / (ne2 * ne1); + float * dst_row = dst_data + (i3*nb3 + i2*nb2 + i1*nb1) / sizeof(float); + + const int64_t i01 = ggml_wrap_index(i1 - s1, ne01); + const int64_t i02 = ggml_wrap_index(i2 - s2, ne02); + const int64_t i03 = ggml_wrap_index(i3 - s3, ne03); + const float * src_row = src_data + (i03*nb03 + i02*nb02 + i01*nb01) / sizeof(float); + + const int64_t s = ggml_wrap_index(-s0, ne00); + const int64_t n = ne00 - s; + ggml_vec_cpy_f32(n, dst_row, src_row + s); + ggml_vec_cpy_f32(s, dst_row + n, src_row); + } +} + +void ggml_compute_forward_roll( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_tensor * src0 = dst->src[0]; + + switch (src0->type) { + case GGML_TYPE_F32: + { + ggml_compute_forward_roll_f32(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + // ggml_compute_forward_arange static void ggml_compute_forward_arange_f32( @@ -7026,7 +8068,7 @@ static void ggml_compute_forward_flash_attn_ext_f16( const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); - ggml_type const k_vec_dot_type = ggml_get_type_traits_cpu(k->type)->vec_dot_type; + ggml_type const k_vec_dot_type = ggml_get_type_traits_cpu(k->type)->vec_dot_type; ggml_from_float_t const q_to_vec_dot = ggml_get_type_traits_cpu(k_vec_dot_type)->from_float; ggml_vec_dot_t const kq_vec_dot = ggml_get_type_traits_cpu(k->type)->vec_dot; ggml_to_float_t const v_to_float = ggml_get_type_traits(v->type)->to_float; @@ -7058,7 +8100,7 @@ static void ggml_compute_forward_flash_attn_ext_f16( memset(VKQ32, 0, DV*sizeof(float)); } - const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1]) : NULL; + const ggml_fp16_t * mp = mask ? (ggml_fp16_t *)((char *) mask->data + iq1*mask->nb[1] + (iq2%mask->ne[2])*mask->nb[2] + (iq3%mask->ne[3])*mask->nb[3]) : NULL; // k indices const int ik3 = iq3 / rk3; @@ -7075,7 +8117,7 @@ static void ggml_compute_forward_flash_attn_ext_f16( // loop over n_kv and n_head_kv // ref: https://arxiv.org/pdf/2112.05682.pdf for (int64_t ic = 0; ic < nek1; ++ic) { - const float mv = mp ? slope*GGML_FP16_TO_FP32(mp[ic]) : 0.0f; + const float mv = mp ? slope*GGML_CPU_FP16_TO_FP32(mp[ic]) : 0.0f; if (mv == -INFINITY) { continue; } @@ -7143,7 +8185,7 @@ static void ggml_compute_forward_flash_attn_ext_f16( if (v->type == GGML_TYPE_F16) { for (int64_t d = 0; d < DV; ++d) { - VKQ32[d] = GGML_FP16_TO_FP32(VKQ16[d]); + VKQ32[d] = GGML_CPU_FP16_TO_FP32(VKQ16[d]); } } @@ -7596,120 +8638,210 @@ void ggml_compute_forward_ssm_conv( static void ggml_compute_forward_ssm_scan_f32( const ggml_compute_params * params, ggml_tensor * dst) { - const ggml_tensor * src0 = dst->src[0]; // s - const ggml_tensor * src1 = dst->src[1]; // x - const ggml_tensor * src2 = dst->src[2]; // dt - const ggml_tensor * src3 = dst->src[3]; // A - const ggml_tensor * src4 = dst->src[4]; // B - const ggml_tensor * src5 = dst->src[5]; // C + const ggml_tensor * src0 = dst->src[0]; // s {d_state, dim, n_head, n_seqs+} + const ggml_tensor * src1 = dst->src[1]; // x {dim, n_head, n_seq_tokens, n_seqs} + const ggml_tensor * src2 = dst->src[2]; // dt {n_head, n_seq_tokens, n_seqs} + const ggml_tensor * src3 = dst->src[3]; // A {d_state, n_head} or {1, n_head} + const ggml_tensor * src4 = dst->src[4]; // B {d_state, n_group, n_seq_tokens, n_seqs} + const ggml_tensor * src5 = dst->src[5]; // C {d_state, n_group, n_seq_tokens, n_seqs} + const ggml_tensor * src6 = dst->src[6]; // ids {n_seqs} const int ith = params->ith; const int nth = params->nth; - const int64_t nc = src0->ne[0]; // d_state - const int64_t nr = src0->ne[1]; // d_inner - const int64_t n_t = src1->ne[1]; // number of tokens per sequence - const int64_t n_s = src0->ne[2]; // number of sequences in the batch + const int64_t nc = src0->ne[0]; // d_state + const int64_t nr = src0->ne[1]; // dim + const int64_t nh = src1->ne[1]; // n_head + const int64_t ng = src4->ne[1]; + const int64_t nt = src1->ne[2]; // number of tokens per sequence + const int64_t ns = src1->ne[3]; // number of sequences in the batch + + // can't use ggml_nbytes because src1 is not necessarily contiguous + const int64_t s_off = ggml_nelements(src1) * ggml_element_size(src1); - GGML_ASSERT(ggml_nelements(src1) + ggml_nelements(src0) == ggml_nelements(dst)); + GGML_ASSERT(ggml_nelements(src1) + nc*nr*nh*ns == ggml_nelements(dst)); GGML_ASSERT(src0->nb[0] == sizeof(float)); GGML_ASSERT(src1->nb[0] == sizeof(float)); GGML_ASSERT(src2->nb[0] == sizeof(float)); GGML_ASSERT(src3->nb[0] == sizeof(float)); GGML_ASSERT(src4->nb[0] == sizeof(float)); GGML_ASSERT(src5->nb[0] == sizeof(float)); - // required for the dot product between s and C - GGML_ASSERT(src0->nb[1] == src0->ne[0]*sizeof(float)); - // required for per-sequence offsets for states - GGML_ASSERT(src0->nb[2] == src0->ne[0]*src0->ne[1]*sizeof(float)); - // required to get correct offset for state destination (i.e. src1->nb[3]) - GGML_ASSERT(src1->nb[3] == src1->ne[0]*src1->ne[1]*src1->ne[2]*sizeof(float)); + GGML_ASSERT(src6->nb[0] == sizeof(int32_t)); + // allows optimizing the modulo since n_group should be a power of 2 + GGML_ASSERT((ng & -ng) == ng); - // rows per thread - const int dr = (nr + nth - 1)/nth; + // heads per thread + const int dh = (nh + nth - 1)/nth; - // row range for this thread - const int ir0 = dr*ith; - const int ir1 = MIN(ir0 + dr, nr); - const int ir = ir1 - ir0; + // head range for this thread + const int ih0 = dh*ith; + const int ih1 = MIN(ih0 + dh, nh); + + const int32_t * ids = (const int32_t *) src6->data; - #ifdef __ARM_FEATURE_SVE - for (int i3 = 0; i3 < n_s; ++i3) { - for (int i2 = 0; i2 < n_t; ++i2) { - const float * s0 = (const float *) ((const char *) src0->data + ir0*(src0->nb[1]) + i3*(src0->nb[2])); // {d_state, d_inner, n_s} - const float * x = (const float *) ((const char *) src1->data + ir0*(src1->nb[0]) + i2*(src1->nb[1]) + i3*(src1->nb[2])); // {d_inner, n_t, n_s} - const float * dt = (const float *) ((const char *) src2->data + ir0*(src2->nb[0]) + i2*(src2->nb[1]) + i3*(src2->nb[2])); // {d_inner, n_t, n_s} - const float * A = (const float *) ((const char *) src3->data + ir0*(src3->nb[1])); // {d_state, d_inner} - const float * B = (const float *) ((const char *) src4->data + i2*(src4->nb[1]) + i3*(src4->nb[2])); // {d_state, n_t, n_s} - const float * C = (const float *) ((const char *) src5->data + i2*(src5->nb[1]) + i3*(src5->nb[2])); // {d_state, n_t, n_s} - float * y = ( float *) (( char *) dst->data + ir0*(src1->nb[0]) + i2*(src1->nb[1]) + i3*(src1->nb[2])); // {d_inner, n_t, n_s} - float * s = ( float *) (( char *) dst->data + ir0*(src0->nb[1]) + i3*(src0->nb[2]) + src1->nb[3]); // {d_state, d_inner, n_s} - - // use the output as the source for the next token-wise iterations - if (i2 > 0) { s0 = s; } - - // d_inner - for (int i1 = 0; i1 < ir; ++i1) { - float dt_soft_plus = dt[i1] <= 20.0f ? log1pf(expf(dt[i1])) : dt[i1]; - float x_dt = x[i1] * dt_soft_plus; - svfloat32_t vx_dt = GGML_F32_VEC_SET1(x_dt); - svfloat32_t vdt_soft_plus = GGML_F32_VEC_SET1(dt_soft_plus); - svfloat32_t r1_vector = GGML_F32_VEC_ZERO; - - for (int64_t k = 0; k < nc; k += svcntw()) { - svfloat32_t vA = GGML_F32_VEC_LOAD(&A[i1*nc + k]); - svfloat32_t vB = GGML_F32_VEC_LOAD(&B[k]); - svfloat32_t vC = GGML_F32_VEC_LOAD(&C[k]); - svfloat32_t vs0 = GGML_F32_VEC_LOAD(&s0[i1*nc + k]); - - svfloat32_t t1 = GGML_F32_VEC_MUL(vdt_soft_plus, vA); - t1 = exp_ps_sve(svptrue_b32(), t1); - svfloat32_t t2 = GGML_F32_VEC_MUL(vx_dt, vB); - - vs0 = GGML_F32_VEC_FMA(vs0, t1, t2); - r1_vector = GGML_F32_VEC_ADD(GGML_F32_VEC_MUL(vs0, vC), r1_vector); - - GGML_F32_VEC_STORE(&s[i1*nc + k], vs0); + for (int i3 = 0; i3 < ns; ++i3) { + const float * s0 = (const float *) ((const char *) src0->data + ids[i3]*(src0->nb[3])); // {d_state, dim, nh, ns} + float * s = ( float *) (( char *) dst->data + i3*(src0->nb[3]) + s_off); // {d_state, dim, nh, ns} + + for (int i2 = 0; i2 < nt; ++i2) { + const float * x = (const float *) ((const char *) src1->data + i2*(src1->nb[2]) + i3*(src1->nb[3])); // {dim, nh, nt, ns} + const float * dt = (const float *) ((const char *) src2->data + i2*(src2->nb[1]) + i3*(src2->nb[2])); // {nh, nt, ns} + const float * A = (const float *) ((const char *) src3->data); // {d_state, nh} or {1, nh} + const float * B = (const float *) ((const char *) src4->data + i2*(src4->nb[2]) + i3*(src4->nb[3])); // {d_state, ng, nt, ns} + const float * C = (const float *) ((const char *) src5->data + i2*(src5->nb[2]) + i3*(src5->nb[3])); // {d_state, ng, nt, ns} + float * y = ( float *) (( char *) dst->data + i2*(nh*nr*sizeof(float)) + i3*(nt*nh*nr*sizeof(float))); // {dim, nh, nt, ns} + + if (src3->ne[0] == 1) { + // Mamba-2 has a scalar decay factor per head; dA can be outside the state-wise loop + + // n_head + for (int h = ih0; h < ih1; ++h) { + // ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16 + const float dt_soft_plus = dt[h] <= 20.0f ? log1pf(expf(dt[h])) : dt[h]; + const float dA = expf(dt_soft_plus * A[h]); + + // dim + for (int i1 = 0; i1 < nr; ++i1) { + const int ii = i1 + h*nr; + const float x_dt = x[ii] * dt_soft_plus; + float sumf = 0.0f; +#if defined(GGML_SIMD) + #if defined(__ARM_FEATURE_SVE) + const int ggml_f32_epr = svcntw(); + const int ggml_f32_step = 1 * ggml_f32_epr; + + const int np = (nc & ~(ggml_f32_step - 1)); + + GGML_F32_VEC sum = GGML_F32_VEC_ZERO; + + GGML_F32_VEC adA = GGML_F32_VEC_SET1(dA); + GGML_F32_VEC axdt = GGML_F32_VEC_SET1(x_dt); + + for (int i = 0; i < np; i += ggml_f32_step) { + // TODO: maybe unroll more? + for (int j = 0; j < 1; j++) { + GGML_F32_VEC t0 = GGML_F32_VEC_LOAD(s0 + i + j*ggml_f32_epr + ii*nc); + GGML_F32_VEC t1 = GGML_F32_VEC_LOAD(B + i + j*ggml_f32_epr + (h & (ng - 1))*nc); + GGML_F32_VEC t2 = GGML_F32_VEC_LOAD(C + i + j*ggml_f32_epr + (h & (ng - 1))*nc); + + t0 = GGML_F32_VEC_MUL(t0, adA); + t1 = GGML_F32_VEC_MUL(t1, axdt); + + t0 = GGML_F32_VEC_ADD(t0, t1); + + sum = GGML_F32_VEC_FMA(sum, t0, t2); + + GGML_F32_VEC_STORE(s + i + j*ggml_f32_epr + ii*nc, t0); + } + } + + sumf = GGML_F32xt_REDUCE_ONE(sum); + #else + const int np = (nc & ~(GGML_F32_STEP - 1)); + + GGML_F32_VEC sum[GGML_F32_ARR] = { GGML_F32_VEC_ZERO }; + + GGML_F32_VEC adA = GGML_F32_VEC_SET1(dA); + GGML_F32_VEC axdt = GGML_F32_VEC_SET1(x_dt); + + GGML_F32_VEC ax[GGML_F32_ARR]; + GGML_F32_VEC ay[GGML_F32_ARR]; + GGML_F32_VEC az[GGML_F32_ARR]; + + for (int i = 0; i < np; i += GGML_F32_STEP) { + for (int j = 0; j < GGML_F32_ARR; j++) { + ax[j] = GGML_F32_VEC_LOAD(s0 + i + j*GGML_F32_EPR + ii*nc); + ay[j] = GGML_F32_VEC_LOAD(B + i + j*GGML_F32_EPR + (h & (ng - 1))*nc); + az[j] = GGML_F32_VEC_LOAD(C + i + j*GGML_F32_EPR + (h & (ng - 1))*nc); + + ax[j] = GGML_F32_VEC_MUL(ax[j], adA); + ay[j] = GGML_F32_VEC_MUL(ay[j], axdt); + + ax[j] = GGML_F32_VEC_ADD(ax[j], ay[j]); + + sum[j] = GGML_F32_VEC_FMA(sum[j], ax[j], az[j]); + + GGML_F32_VEC_STORE(s + i + j*GGML_F32_EPR + ii*nc, ax[j]); + } + } + + // reduce sum0..sum3 to sum0 + GGML_F32_VEC_REDUCE(sumf, sum); + #endif +#else + const int np = 0; +#endif + // d_state + for (int i0 = np; i0 < nc; ++i0) { + const int i = i0 + ii*nc; + const int ig = i0 + (h & (ng - 1))*nc; + // state = prev_state * dA + dB * x + const float state = (s0[i] * dA) + (B[ig] * x_dt); + // y = rowwise_dotprod(state, C) + sumf += state * C[ig]; + s[i] = state; + } + y[ii] = sumf; } - y[i1] = GGML_F32xt_REDUCE_ONE(r1_vector); } - } - } - #else - for (int i3 = 0; i3 < n_s; ++i3) { - for (int i2 = 0; i2 < n_t; ++i2) { - const float * s0 = (const float *) ((const char *) src0->data + ir0*(src0->nb[1]) + i3*(src0->nb[2])); // {d_state, d_inner, n_s} - const float * x = (const float *) ((const char *) src1->data + ir0*(src1->nb[0]) + i2*(src1->nb[1]) + i3*(src1->nb[2])); // {d_inner, n_t, n_s} - const float * dt = (const float *) ((const char *) src2->data + ir0*(src2->nb[0]) + i2*(src2->nb[1]) + i3*(src2->nb[2])); // {d_inner, n_t, n_s} - const float * A = (const float *) ((const char *) src3->data + ir0*(src3->nb[1])); // {d_state, d_inner} - const float * B = (const float *) ((const char *) src4->data + i2*(src4->nb[1]) + i3*(src4->nb[2])); // {d_state, n_t, n_s} - const float * C = (const float *) ((const char *) src5->data + i2*(src5->nb[1]) + i3*(src5->nb[2])); // {d_state, n_t, n_s} - float * y = ( float *) (( char *) dst->data + ir0*(src1->nb[0]) + i2*(src1->nb[1]) + i3*(src1->nb[2])); // {d_inner, n_t, n_s} - float * s = ( float *) (( char *) dst->data + ir0*(src0->nb[1]) + i3*(src0->nb[2]) + src1->nb[3]); // {d_state, d_inner, n_s} - - // use the output as the source for the next token-wise iterations - if (i2 > 0) { s0 = s; } - - // d_inner - for (int i1 = 0; i1 < ir; ++i1) { - // ref: https://github.com/state-spaces/mamba/blob/34076d664838588a3c97727b263478ab9f621a07/mamba_ssm/ops/triton/selective_state_update.py#L78 - float dt_soft_plus = dt[i1] <= 20.0f ? log1pf(expf(dt[i1])) : dt[i1]; - float x_dt = x[i1] * dt_soft_plus; - float sumf = 0.0f; - // d_state - for (int i0 = 0; i0 < nc; ++i0) { - int i = i0 + i1*nc; - // state = prev_state * dA + dB * x - float state = (s0[i] * expf(dt_soft_plus * A[i])) + (B[i0] * x_dt); - // y = rowwise_dotprod(state, C) - sumf += state * C[i0]; - s[i] = state; + } else { + // Mamba-1 has an element-wise decay factor for the states + + // n_head + for (int h = ih0; h < ih1; ++h) { + // ref: https://github.com/state-spaces/mamba/blob/62db608da60f6fc790b8ed9f4b3225e95ca15fde/mamba_ssm/ops/triton/softplus.py#L16 + const float dt_soft_plus = dt[h] <= 20.0f ? log1pf(expf(dt[h])) : dt[h]; + + // dim + for (int i1 = 0; i1 < nr; ++i1) { + const int ii = i1 + h*nr; + const float x_dt = x[ii] * dt_soft_plus; +#if defined(__ARM_FEATURE_SVE) + svfloat32_t vx_dt = GGML_F32_VEC_SET1(x_dt); + svfloat32_t vdt_soft_plus = GGML_F32_VEC_SET1(dt_soft_plus); + svfloat32_t r1_vector = GGML_F32_VEC_ZERO; + + // d_state + // TODO: what happens when (d_state % svcntw()) != 0? + for (int64_t k = 0; k < nc; k += svcntw()) { + svfloat32_t vA = GGML_F32_VEC_LOAD(&A[h*nc + k]); + svfloat32_t vB = GGML_F32_VEC_LOAD(&B[k + (h & (ng - 1))*nc]); + svfloat32_t vC = GGML_F32_VEC_LOAD(&C[k + (h & (ng - 1))*nc]); + svfloat32_t vs0 = GGML_F32_VEC_LOAD(&s0[ii*nc + k]); + + svfloat32_t t1 = GGML_F32_VEC_MUL(vdt_soft_plus, vA); + t1 = exp_ps_sve(svptrue_b32(), t1); + svfloat32_t t2 = GGML_F32_VEC_MUL(vx_dt, vB); + + vs0 = GGML_F32_VEC_FMA(t2, vs0, t1); + r1_vector = GGML_F32_VEC_ADD(GGML_F32_VEC_MUL(vs0, vC), r1_vector); + + GGML_F32_VEC_STORE(&s[ii*nc + k], vs0); + } + y[ii] = GGML_F32xt_REDUCE_ONE(r1_vector); +#else + float sumf = 0.0f; + // NOTE: can't really use GGML_SIMD here because d_state is usually 16 + // and also because expf is used within the loop. + // d_state + for (int i0 = 0; i0 < nc; ++i0) { + const int i = i0 + ii*nc; + const int ig = i0 + (h & (ng - 1))*nc; + // state = prev_state * dA + dB * x + const float state = (s0[i] * expf(dt_soft_plus * A[i0 + h*nc])) + (B[ig] * x_dt); + // y = rowwise_dotprod(state, C) + sumf += state * C[ig]; + s[i] = state; + } + y[ii] = sumf; +#endif } - y[i1] = sumf; } } + // use the output as the source when it's not the first token-wise iteration + s0 = s; } - #endif + } } void ggml_compute_forward_ssm_scan( @@ -7927,6 +9059,42 @@ void ggml_compute_forward_unary( } } +//ggml_compute_forward_glu + +void ggml_compute_forward_glu( + const ggml_compute_params * params, + ggml_tensor * dst) { + + const ggml_glu_op op = ggml_get_glu_op(dst); + + switch (op) { + case GGML_GLU_OP_REGLU: + { + ggml_compute_forward_reglu(params, dst); + } break; + case GGML_GLU_OP_GEGLU: + { + ggml_compute_forward_geglu(params, dst); + } break; + case GGML_GLU_OP_SWIGLU: + { + ggml_compute_forward_swiglu(params, dst); + } break; + case GGML_GLU_OP_GEGLU_ERF: + { + ggml_compute_forward_geglu_erf(params, dst); + } break; + case GGML_GLU_OP_GEGLU_QUICK: + { + ggml_compute_forward_geglu_quick(params, dst); + } break; + default: + { + GGML_ABORT("fatal error"); + } + } +} + // ggml_compute_forward_get_rel_pos static void ggml_compute_forward_get_rel_pos_f16( diff --git a/ggml/src/ggml-cpu/ops.h b/ggml/src/ggml-cpu/ops.h index dc081b9e66397..3a32ec20dba2b 100644 --- a/ggml/src/ggml-cpu/ops.h +++ b/ggml/src/ggml-cpu/ops.h @@ -20,6 +20,9 @@ static const size_t CACHE_LINE_SIZE_F32 = CACHE_LINE_SIZE/sizeof(float); +// Work buffer size for im2col operations in CONV2D +#define GGML_IM2COL_WORK_SIZE (16 * 1024 * 1024) + #ifdef __cplusplus extern "C" { #endif @@ -53,6 +56,7 @@ void ggml_compute_forward_permute(const struct ggml_compute_params * params, str void ggml_compute_forward_transpose(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_get_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_get_rows_back(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_set_rows(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_diag(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_diag_mask_inf(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_diag_mask_zero(const struct ggml_compute_params * params, struct ggml_tensor * dst); @@ -64,6 +68,7 @@ void ggml_compute_forward_clamp(const struct ggml_compute_params * params, struc void ggml_compute_forward_conv_transpose_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_im2col(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_im2col_back_f32(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_conv_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_conv_transpose_2d(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_conv_2d_dw(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_pool_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst); @@ -72,6 +77,7 @@ void ggml_compute_forward_pool_2d_back(const struct ggml_compute_params * params void ggml_compute_forward_upscale(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_pad(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_pad_reflect_1d(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_roll(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_arange(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_timestep_embedding(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_argsort(const struct ggml_compute_params * params, struct ggml_tensor * dst); @@ -92,6 +98,7 @@ void ggml_compute_forward_ssm_scan(const struct ggml_compute_params * params, st void ggml_compute_forward_win_part(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_win_unpart(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_unary(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_glu(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_get_rel_pos(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_add_rel_pos(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_rwkv_wkv6(const struct ggml_compute_params * params, struct ggml_tensor * dst); @@ -104,6 +111,7 @@ void ggml_compute_forward_custom(const struct ggml_compute_params * params, stru void ggml_compute_forward_cross_entropy_loss(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_cross_entropy_loss_back(const struct ggml_compute_params * params, struct ggml_tensor * dst); void ggml_compute_forward_opt_step_adamw(const struct ggml_compute_params * params, struct ggml_tensor * dst); +void ggml_compute_forward_mul_mat(const struct ggml_compute_params * params, struct ggml_tensor * dst); #ifdef __cplusplus } diff --git a/ggml/src/ggml-cpu/quants.c b/ggml/src/ggml-cpu/quants.c index 1ca9c50e724a3..ee35ab42fda07 100644 --- a/ggml/src/ggml-cpu/quants.c +++ b/ggml/src/ggml-cpu/quants.c @@ -2,9 +2,12 @@ #include "ggml-common.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "ggml-quants.h" #include "quants.h" +#include "arch-fallback.h" + #include #include #include @@ -38,12 +41,10 @@ void quantize_row_q5_1(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, in void quantize_row_q8_0_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { quantize_row_q8_0_ref(x, y, k); } -GGML_CPU_NATIVE_IMPL(quantize_row_q8_0) void quantize_row_q8_1_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { quantize_row_q8_1_ref(x, y, k); } -GGML_CPU_NATIVE_IMPL(quantize_row_q8_1) // // 2-6 bit quantization in super-blocks @@ -104,7 +105,6 @@ void quantize_row_tq2_0(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, void quantize_row_q8_K_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k) { quantize_row_q8_K_ref(x, y, k); } -GGML_CPU_NATIVE_IMPL(quantize_row_q8_K) //===================================== Dot products ================================= @@ -138,12 +138,11 @@ void ggml_vec_dot_q4_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c } int sumi = sumi0 + sumi1; - sumf += sumi*GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d); + sumf += sumi*GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_0_q8_0) // TODO: add WASM SIMD void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { @@ -176,12 +175,11 @@ void ggml_vec_dot_q4_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_1_q8_1) void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { const int qk = QK8_0; @@ -220,12 +218,11 @@ void ggml_vec_dot_q5_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)) * sumi; + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)) * sumi; } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_0_q8_0) void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { const int qk = QK8_1; @@ -264,12 +261,11 @@ void ggml_vec_dot_q5_1_q8_1_generic(int n, float * GGML_RESTRICT s, size_t bs, c } int sumi = sumi0 + sumi1; - sumf += (GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d))*sumi + GGML_FP16_TO_FP32(x[ib].m)*GGML_FP16_TO_FP32(y[ib].s); + sumf += (GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d))*sumi + GGML_CPU_FP16_TO_FP32(x[ib].m)*GGML_CPU_FP16_TO_FP32(y[ib].s); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_1_q8_1) void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { const int qk = QK8_0; @@ -295,12 +291,11 @@ void ggml_vec_dot_q8_0_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, c sumi += x[ib].qs[j]*y[ib].qs[j]; } - sumf += sumi*(GGML_FP16_TO_FP32(x[ib].d)*GGML_FP16_TO_FP32(y[ib].d)); + sumf += sumi*(GGML_CPU_FP16_TO_FP32(x[ib].d)*GGML_CPU_FP16_TO_FP32(y[ib].d)); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q8_0_q8_0) void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -348,12 +343,11 @@ void ggml_vec_dot_tq1_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } } - sumf += (float) sum * (GGML_FP16_TO_FP32(x[i].d) * y[i].d); + sumf += (float) sum * (GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq1_0_q8_K) void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -379,14 +373,13 @@ void ggml_vec_dot_tq2_0_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } } - const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d); + const float d = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); sumf += (float) sumi * d; } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_tq2_0_q8_K) void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -413,8 +406,8 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c summs += y[i].bsums[j] * (sc[j] >> 4); } - const float dall = y[i].d * GGML_FP16_TO_FP32(x[i].d); - const float dmin = y[i].d * GGML_FP16_TO_FP32(x[i].dmin); + const float dall = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].d); + const float dmin = y[i].d * GGML_CPU_FP16_TO_FP32(x[i].dmin); int isum = 0; int is = 0; @@ -439,7 +432,6 @@ void ggml_vec_dot_q2_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q2_K_q8_K) void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -513,13 +505,12 @@ void ggml_vec_dot_q3_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c for (int l = 0; l < 8; ++l) aux32[l] += (scales[j] - 32) * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q3_K_q8_K) void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -587,15 +578,14 @@ void ggml_vec_dot_q4_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q4_K_q8_K) void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -668,15 +658,14 @@ void ggml_vec_dot_q5_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; - const float dmin = GGML_FP16_TO_FP32(x[i].dmin) * y[i].d; + const float dmin = GGML_CPU_FP16_TO_FP32(x[i].dmin) * y[i].d; sumf -= dmin * sumi; } for (int l = 0; l < 8; ++l) sumf += sums[l]; *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q5_K_q8_K) void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -726,13 +715,12 @@ void ggml_vec_dot_q6_K_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, c for (int l = 0; l < 8; ++l) aux32[l] += scale * aux16[l]; q8 += 8; a += 8; } - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; for (int l = 0; l < 8; ++l) sums[l] += d * aux32[l]; } for (int l = 0; l < 8; ++l) sumf += sums[l]; *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_q6_K_q8_K) void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -752,7 +740,7 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const int8_t * GGML_RESTRICT q8 = y[i].qs; int32_t bsum = 0; @@ -775,7 +763,6 @@ void ggml_vec_dot_iq2_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs } *s = 0.125f * sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xxs_q8_K) void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -792,7 +779,7 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint16_t * GGML_RESTRICT q2 = x[i].qs; const uint8_t * GGML_RESTRICT sc = x[i].scales; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -826,7 +813,6 @@ void ggml_vec_dot_iq2_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } *s = 0.125f * sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_xs_q8_K) void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -844,7 +830,7 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, float sumf = 0; for (int i = 0; i < nb; i++) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const int8_t * q8 = y[i].qs; const uint8_t * qs = x[i].qs; const uint8_t * qh = x[i].qh; @@ -879,7 +865,6 @@ void ggml_vec_dot_iq2_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, *s = 0.125f * sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq2_s_q8_K) void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -898,7 +883,7 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT q3 = x[i].qs; const uint8_t * GGML_RESTRICT gas = x[i].qs + QK_K/4; const int8_t * GGML_RESTRICT q8 = y[i].qs; @@ -924,7 +909,6 @@ void ggml_vec_dot_iq3_xxs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs } *s = 0.25f * sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_xxs_q8_K) void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -941,7 +925,7 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, float sumf = 0.f; for (int i = 0; i < nb; ++i) { - const float d = GGML_FP16_TO_FP32(x[i].d) * y[i].d; + const float d = GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d; const uint8_t * GGML_RESTRICT qs = x[i].qs; const uint8_t * GGML_RESTRICT qh = x[i].qh; const uint8_t * GGML_RESTRICT signs = x[i].signs; @@ -981,7 +965,6 @@ void ggml_vec_dot_iq3_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq3_s_q8_K) void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -1020,12 +1003,11 @@ void ggml_vec_dot_iq1_s_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, qs += 4; } - sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); + sumf += GGML_CPU_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_s_q8_K) void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(n % QK_K == 0); @@ -1082,12 +1064,11 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, qh += 2; } - sumf += GGML_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); + sumf += GGML_CPU_FP16_TO_FP32(scale.f16) * y[i].d * (sumi1 + IQ1M_DELTA * sumi2); } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq1_m_q8_K) void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -1107,7 +1088,7 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, float sumf = 0; for (; ib < nb; ++ib) { - const float d = GGML_FP16_TO_FP32(y[ib].d)*GGML_FP16_TO_FP32(x[ib].d); + const float d = GGML_CPU_FP16_TO_FP32(y[ib].d)*GGML_CPU_FP16_TO_FP32(x[ib].d); int sumi1 = 0, sumi2 = 0; for (int j = 0; j < QK4_NL/2; ++j) { sumi1 += y[ib].qs[j+ 0] * kvalues_iq4nl[x[ib].qs[j] & 0xf]; @@ -1117,7 +1098,6 @@ void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_nl_q8_0) void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc) { assert(nrc == 1); @@ -1134,7 +1114,7 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, float sumf = 0; for (int ibl = 0; ibl < nb; ++ibl) { - const float d4d8 = GGML_FP16_TO_FP32(x[ibl].d) * y[ibl].d; + const float d4d8 = GGML_CPU_FP16_TO_FP32(x[ibl].d) * y[ibl].d; uint16_t h = x[ibl].scales_h; const uint8_t * qs = x[ibl].qs; const int8_t * q8 = y[ibl].qs; @@ -1164,7 +1144,6 @@ void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } *s = sumf; } -GGML_CPU_NATIVE_IMPL(ggml_vec_dot_iq4_xs_q8_K) // ============================ 4-bit non-linear quants diff --git a/ggml/src/ggml-cpu/quants.h b/ggml/src/ggml-cpu/quants.h index d729e07d633f5..dc4342c87f592 100644 --- a/ggml/src/ggml-cpu/quants.h +++ b/ggml/src/ggml-cpu/quants.h @@ -84,33 +84,6 @@ void ggml_vec_dot_iq1_m_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, void ggml_vec_dot_iq4_nl_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); void ggml_vec_dot_iq4_xs_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc); -#if defined(GGML_CPU_GENERIC) -#define quantize_row_q8_0_generic quantize_row_q8_0 -#define quantize_row_q8_1_generic quantize_row_q8_1 -#define quantize_row_q8_K_generic quantize_row_q8_K -#define ggml_vec_dot_q4_0_q8_0_generic ggml_vec_dot_q4_0_q8_0 -#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1 -#define ggml_vec_dot_q5_0_q8_0_generic ggml_vec_dot_q5_0_q8_0 -#define ggml_vec_dot_q5_1_q8_1_generic ggml_vec_dot_q5_1_q8_1 -#define ggml_vec_dot_q8_0_q8_0_generic ggml_vec_dot_q8_0_q8_0 -#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K -#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K -#define ggml_vec_dot_q2_K_q8_K_generic ggml_vec_dot_q2_K_q8_K -#define ggml_vec_dot_q3_K_q8_K_generic ggml_vec_dot_q3_K_q8_K -#define ggml_vec_dot_q4_K_q8_K_generic ggml_vec_dot_q4_K_q8_K -#define ggml_vec_dot_q5_K_q8_K_generic ggml_vec_dot_q5_K_q8_K -#define ggml_vec_dot_q6_K_q8_K_generic ggml_vec_dot_q6_K_q8_K -#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K -#define ggml_vec_dot_iq2_xs_q8_K_generic ggml_vec_dot_iq2_xs_q8_K -#define ggml_vec_dot_iq2_s_q8_K_generic ggml_vec_dot_iq2_s_q8_K -#define ggml_vec_dot_iq3_xxs_q8_K_generic ggml_vec_dot_iq3_xxs_q8_K -#define ggml_vec_dot_iq3_s_q8_K_generic ggml_vec_dot_iq3_s_q8_K -#define ggml_vec_dot_iq1_s_q8_K_generic ggml_vec_dot_iq1_s_q8_K -#define ggml_vec_dot_iq1_m_q8_K_generic ggml_vec_dot_iq1_m_q8_K -#define ggml_vec_dot_iq4_nl_q8_0_generic ggml_vec_dot_iq4_nl_q8_0 -#define ggml_vec_dot_iq4_xs_q8_K_generic ggml_vec_dot_iq4_xs_q8_K -#endif - #ifdef __cplusplus } #endif diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 628142d5f630a..72ee93a5abc7c 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -6,8 +6,11 @@ #include "ggml-impl.h" #include "ggml-cpu.h" #include "ggml-cpu-impl.h" +#include "simd-mappings.h" #include "traits.h" +#include "arch-fallback.h" + #include #include #include @@ -70,7 +73,7 @@ void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GG const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < QK8_0 * 4; j++) { @@ -83,7 +86,6 @@ void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GG } } } -GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_0_4x4) void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { assert(QK8_0 == 32); @@ -109,7 +111,7 @@ void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GG const float d = amax / ((1 << 7) - 1); id[row_iter] = d ? 1.0f / d : 0.0f; - y[i].d[row_iter] = GGML_FP32_TO_FP16(d); + y[i].d[row_iter] = GGML_CPU_FP32_TO_FP16(d); } for (int j = 0; j < QK8_0 * 4; j++) { @@ -122,7 +124,6 @@ void ggml_quantize_mat_q8_0_4x8_generic(const float * GGML_RESTRICT x, void * GG } } } -GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_0_4x8) void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) { assert(QK_K == 256); @@ -174,7 +175,6 @@ void ggml_quantize_mat_q8_K_4x8_generic(const float * GGML_RESTRICT x, void * GG } } } -GGML_CPU_NATIVE_IMPL(ggml_quantize_mat_q8_K_4x8) } // extern "C" @@ -237,14 +237,13 @@ void ggml_gemv_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; } } -GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_4x4_q8_0) void ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -282,14 +281,13 @@ void ggml_gemv_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } for (int j = 0; j < ncols_interleaved; j++) s[x * ncols_interleaved + j] = sumf[j]; } } -GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_4x8_q8_0) void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -328,7 +326,7 @@ void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const int v1 = (int8_t) (b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] & 0xF0); sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])) >> 4; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -336,7 +334,6 @@ void ggml_gemv_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_0_8x8_q8_0) void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK_K; @@ -400,13 +397,13 @@ void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, sumi2 = sumi2 * scales_1[j]; sumi += sumi1 + sumi2; } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d; + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d; } } for (int sb = 0; sb < 8; sb++) { uint8_t *mins = (uint8_t*) utmp + 8 + sb * 16; for (int j = 0; j < ncols_interleaved; j++) { - sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d; + sum_minf[j] += mins[j] * (a_ptr[l].bsums[sb * 2] + a_ptr[l].bsums[sb * 2 + 1]) * GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d; } } } @@ -415,7 +412,6 @@ void ggml_gemv_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemv_q4_K_8x8_q8_K) void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -454,7 +450,7 @@ void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs const int v1 = kvalues_iq4nl[b_ptr[l].qs[k * ncols_interleaved * blocklen + j * blocklen + i] >> 4]; sumi += ((v0 * a_ptr[l].qs[k * blocklen + i]) + (v1 * a_ptr[l].qs[k * blocklen + i + qk / 2])); } - sumf[j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d); + sumf[j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d); } } } @@ -462,7 +458,6 @@ void ggml_gemv_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs } } } -GGML_CPU_NATIVE_IMPL(ggml_gemv_iq4_nl_4x4_q8_0) void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -506,7 +501,7 @@ void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -519,7 +514,6 @@ void ggml_gemm_q4_0_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_4x4_q8_0) void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -562,7 +556,7 @@ void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -574,7 +568,6 @@ void ggml_gemm_q4_0_4x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_4x8_q8_0) void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -617,7 +610,7 @@ void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])) >> 4; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -629,7 +622,6 @@ void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_0_8x8_q8_0) void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK_K; @@ -697,7 +689,7 @@ void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, sumi2 = sumi2 * scales_1[j]; sumi += sumi1 + sumi2; } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m]; + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * a_ptr[l].d[m]; } } } @@ -706,7 +698,7 @@ void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, for(int m = 0; m < 4; m++) { const int16_t *bsums = a_ptr[l].bsums + (sb * 8) + (m * 4) - ((sb % 2) * 6); for(int j = 0; j < ncols_interleaved; j++) { - sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * GGML_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m]; + sum_minf[m][j] += mins[j] * (bsums[0] + bsums[1]) * GGML_CPU_FP16_TO_FP32(b_ptr[l].dmin[j]) * a_ptr[l].d[m]; } } } @@ -719,7 +711,6 @@ void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, } } } -GGML_CPU_NATIVE_IMPL(ggml_gemm_q4_K_8x8_q8_K) void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc) { const int qk = QK8_0; @@ -763,7 +754,7 @@ void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs sumi += ((v0 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i]) + (v1 * a_ptr[l].qs[k * 4 * blocklen + m * blocklen + i + qk / 2 * 4])); } - sumf[m][j] += sumi * GGML_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_FP16_TO_FP32(a_ptr[l].d[m]); + sumf[m][j] += sumi * GGML_CPU_FP16_TO_FP32(b_ptr[l].d[j]) * GGML_CPU_FP16_TO_FP32(a_ptr[l].d[m]); } } } @@ -776,7 +767,6 @@ void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs } } } -GGML_CPU_NATIVE_IMPL(ggml_gemm_iq4_nl_4x4_q8_0) } // extern "C" @@ -1174,13 +1164,24 @@ template op) { case GGML_OP_MUL_MAT: - size = ggml_row_size(PARAM_TYPE, ggml_nelements(op->src[1])); - return true; + { + size = ggml_row_size(PARAM_TYPE, ggml_nelements(op->src[1])); + return true; + } case GGML_OP_MUL_MAT_ID: - size = ggml_row_size(PARAM_TYPE, ggml_nelements(op->src[1])); - size = GGML_PAD(size, sizeof(int64_t)); // + padding for next bloc. - size += sizeof(int64_t) * (1+op->src[0]->ne[2]) * op->src[1]->ne[2]; - return true; + { + size = ggml_row_size(PARAM_TYPE, ggml_nelements(op->src[1])); + size = GGML_PAD(size, sizeof(int64_t)); // + padding for next bloc. + + const int64_t ne02 = op->src[0]->ne[2]; // n_as, n_expert + const int64_t ne12 = op->src[1]->ne[2]; // n_tokens + + const size_t sizeof_mmid_row_mapping = sizeof(int64_t); + + size += sizeof_mmid_row_mapping*ne02*(ne12 + 1); + + return true; + } default: // GGML_ABORT("fatal error"); break; @@ -1316,14 +1317,17 @@ template wsize >= (GGML_PAD(nbw3, sizeof(int64_t)) + n_as * sizeof(int64_t) + - n_as * ne12 * sizeof(mmid_row_mapping))); + GGML_ASSERT(params->wsize >= + (GGML_PAD(nbw3, sizeof(int64_t)) + + n_as*(ne12 + 1)*sizeof(mmid_row_mapping)) + ); - auto * wdata = (char *) params->wdata; - auto * wdata_src1_end = (char *) wdata + GGML_PAD(nbw3, sizeof(int64_t)); - auto * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as] + auto * wdata = (char *)params->wdata; + auto * wdata_src1_end = (char *)wdata + GGML_PAD(nbw3, sizeof(int64_t)); - struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *) (matrix_row_counts + n_as); // [n_as][ne12] + // total of [n_as][ne12 + 1] elemets of type mmid_row_mapping (2*int32_t = int64_t) + auto * matrix_row_counts = (int64_t *) (wdata_src1_end); // [n_as] + struct mmid_row_mapping * matrix_rows = (struct mmid_row_mapping *) (matrix_row_counts + n_as); // [n_as][ne12] // src1: float32 => param type for (int64_t i12 = 0; i12 < ne12; ++i12) { @@ -1408,44 +1412,45 @@ template q4_0_4x4_q8_0; -static const tensor_traits q4_0_4x8_q8_0; -static const tensor_traits q4_0_8x8_q8_0; -static const tensor_traits q4_K_8x8_q8_K; - -// instance for IQ4 -static const tensor_traits iq4_nl_4x4_q8_0; - } // namespace ggml::cpu::repack static const ggml::cpu::tensor_traits * ggml_repack_get_optimal_repack_type(const struct ggml_tensor * cur) { + + // instance for Q4 + static const ggml::cpu::repack::tensor_traits q4_0_4x4_q8_0; + static const ggml::cpu::repack::tensor_traits q4_0_4x8_q8_0; + static const ggml::cpu::repack::tensor_traits q4_0_8x8_q8_0; + static const ggml::cpu::repack::tensor_traits q4_K_8x8_q8_K; + + // instance for IQ4 + static const ggml::cpu::repack::tensor_traits iq4_nl_4x4_q8_0; + if (cur->type == GGML_TYPE_Q4_0) { if (ggml_cpu_has_avx2() || (ggml_cpu_has_sve() && ggml_cpu_has_matmul_int8() && ggml_cpu_get_sve_cnt() == QK8_0)) { if (cur->ne[1] % 8 == 0) { - return &ggml::cpu::repack::q4_0_8x8_q8_0; + return &q4_0_8x8_q8_0; } } if (ggml_cpu_has_neon() && ggml_cpu_has_matmul_int8()) { if (cur->ne[1] % 4 == 0) { - return &ggml::cpu::repack::q4_0_4x8_q8_0; + return &q4_0_4x8_q8_0; } } if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { if (cur->ne[1] % 4 == 0) { - return &ggml::cpu::repack::q4_0_4x4_q8_0; + return &q4_0_4x4_q8_0; } } } else if (cur->type == GGML_TYPE_Q4_K) { if (ggml_cpu_has_avx2()) { if (cur->ne[1] % 8 == 0) { - return &ggml::cpu::repack::q4_K_8x8_q8_K; + return &q4_K_8x8_q8_K; } } } else if (cur->type == GGML_TYPE_IQ4_NL) { if (ggml_cpu_has_neon() && ggml_cpu_has_dotprod()) { if (cur->ne[1] % 4 == 0) { - return &ggml::cpu::repack::iq4_nl_4x4_q8_0; + return &iq4_nl_4x4_q8_0; } } } diff --git a/ggml/src/ggml-cpu/repack.h b/ggml/src/ggml-cpu/repack.h index 8ee6e92ea96b8..4421e5f8e7046 100644 --- a/ggml/src/ggml-cpu/repack.h +++ b/ggml/src/ggml-cpu/repack.h @@ -64,10 +64,6 @@ static_assert(sizeof(block_iq4_nlx4) == 4 * sizeof(ggml_half) + QK4_NL * 2, "wro extern "C" { #endif -// Workaround for clang: -// clang++ complains: ``error: call to 'ggml_gemm_q4_0_4x4_q8_0' is ambiguous'' -// repro: https://godbolt.org/z/oKdeWKonM (ICE), https://godbolt.org/z/1szq6P36v (ambiguous call) -#if defined(GGML_CPU_CLANG_WORKAROUND) || !(defined(__GNUC__) && defined(__clang__)) || defined(__HIPCC__) void ggml_quantize_mat_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); void ggml_quantize_mat_q8_0_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); void ggml_quantize_mat_q8_K_4x8(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); @@ -81,7 +77,6 @@ void ggml_gemm_q4_0_4x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const vo void ggml_gemm_q4_0_8x8_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); void ggml_gemm_q4_K_8x8_q8_K(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); void ggml_gemm_iq4_nl_4x4_q8_0(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); -#endif // !defined(__clang__) // Native implementations void ggml_quantize_mat_q8_0_4x4_generic(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k); @@ -98,22 +93,6 @@ void ggml_gemm_q4_0_8x8_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, void ggml_gemm_q4_K_8x8_q8_K_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); void ggml_gemm_iq4_nl_4x4_q8_0_generic(int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, const void * GGML_RESTRICT vy, int nr, int nc); -#if defined(GGML_CPU_GENERIC) -#define ggml_quantize_mat_q8_0_4x4_generic ggml_quantize_mat_q8_0_4x4 -#define ggml_quantize_mat_q8_0_4x8_generic ggml_quantize_mat_q8_0_4x8 -#define ggml_quantize_mat_q8_K_4x8_generic ggml_quantize_mat_q8_K_4x8 -#define ggml_gemv_q4_0_4x4_q8_0_generic ggml_gemv_q4_0_4x4_q8_0 -#define ggml_gemv_q4_0_4x8_q8_0_generic ggml_gemv_q4_0_4x8_q8_0 -#define ggml_gemv_q4_0_8x8_q8_0_generic ggml_gemv_q4_0_8x8_q8_0 -#define ggml_gemv_q4_K_8x8_q8_K_generic ggml_gemv_q4_K_8x8_q8_K -#define ggml_gemv_iq4_nl_4x4_q8_0_generic ggml_gemv_iq4_nl_4x4_q8_0 -#define ggml_gemm_q4_0_4x4_q8_0_generic ggml_gemm_q4_0_4x4_q8_0 -#define ggml_gemm_q4_0_4x8_q8_0_generic ggml_gemm_q4_0_4x8_q8_0 -#define ggml_gemm_q4_0_8x8_q8_0_generic ggml_gemm_q4_0_8x8_q8_0 -#define ggml_gemm_q4_K_8x8_q8_K_generic ggml_gemm_q4_K_8x8_q8_K -#define ggml_gemm_iq4_nl_4x4_q8_0_generic ggml_gemm_iq4_nl_4x4_q8_0 -#endif - #if defined(__cplusplus) } // extern "C" #endif diff --git a/ggml/src/ggml-cpu/simd-mappings.h b/ggml/src/ggml-cpu/simd-mappings.h index 2e3669c0186c9..b4ad68c9fd647 100644 --- a/ggml/src/ggml-cpu/simd-mappings.h +++ b/ggml/src/ggml-cpu/simd-mappings.h @@ -2,10 +2,167 @@ #include "ggml-cpu-impl.h" +#ifdef __ARM_FEATURE_SVE +#include +#endif // __ARM_FEATURE_SVE + +#if defined(__ARM_NEON) && !defined(__CUDACC__) && !defined(__MUSACC__) +// if YCM cannot find , make a symbolic link to it, for example: +// +// $ ln -sfn /Library/Developer/CommandLineTools/usr/lib/clang/13.1.6/include/arm_neon.h ./src/ +// +#include +#endif + +#if defined(__F16C__) +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + // // simd mappings // +// FP16 to FP32 conversion + +// 16-bit float +// on Arm, we use __fp16 +// on x86, we use uint16_t +// +// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616 +// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843 +// +#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) neon_compute_fp16_to_fp32(x) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) neon_compute_fp32_to_fp16(x) + + #define GGML_CPU_FP16_TO_FP32(x) GGML_CPU_COMPUTE_FP16_TO_FP32(x) + + static inline float neon_compute_fp16_to_fp32(ggml_fp16_t h) { + __fp16 tmp; + memcpy(&tmp, &h, sizeof(ggml_fp16_t)); + return (float)tmp; + } + + static inline ggml_fp16_t neon_compute_fp32_to_fp16(float f) { + ggml_fp16_t res; + __fp16 tmp = f; + memcpy(&res, &tmp, sizeof(ggml_fp16_t)); + return res; + } +#elif defined(__F16C__) + #ifdef _MSC_VER + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x))) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0) + #else + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0) + #endif +#elif defined(__POWER9_VECTOR__) + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) power_compute_fp16_to_fp32(x) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) power_compute_fp32_to_fp16(x) + /* the inline asm below is about 12% faster than the lookup method */ + #define GGML_CPU_FP16_TO_FP32(x) GGML_CPU_COMPUTE_FP16_TO_FP32(x) + #define GGML_CPU_FP32_TO_FP16(x) GGML_CPU_COMPUTE_FP32_TO_FP16(x) + + static inline float power_compute_fp16_to_fp32(ggml_fp16_t h) { + float f; + double d; + __asm__( + "mtfprd %0,%2\n" + "xscvhpdp %0,%0\n" + "frsp %1,%0\n" : + /* temp */ "=d"(d), + /* out */ "=f"(f): + /* in */ "r"(h)); + return f; + } + + static inline ggml_fp16_t power_compute_fp32_to_fp16(float f) { + double d; + ggml_fp16_t r; + __asm__( /* xscvdphp can work on double or single precision */ + "xscvdphp %0,%2\n" + "mffprd %1,%0\n" : + /* temp */ "=d"(d), + /* out */ "=r"(r): + /* in */ "f"(f)); + return r; + } +#elif defined(__riscv) && defined(__riscv_zfhmin) + static inline float riscv_compute_fp16_to_fp32(ggml_fp16_t h) { + float f; + __asm__( + "fmv.h.x %[f], %[h]\n\t" + "fcvt.s.h %[f], %[f]" + : [f] "=&f" (f) + : [h] "r" (h) + ); + return f; + } + + static inline ggml_fp16_t riscv_compute_fp32_to_fp16(float f) { + ggml_fp16_t res; + __asm__( + "fcvt.h.s %[f], %[f]\n\t" + "fmv.x.h %[h], %[f]" + : [h] "=&r" (res) + : [f] "f" (f) + ); + return res; + } + + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) riscv_compute_fp16_to_fp32(x) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) riscv_compute_fp32_to_fp16(x) + #define GGML_CPU_FP16_TO_FP32(x) GGML_CPU_COMPUTE_FP16_TO_FP32(x) + #define GGML_CPU_FP32_TO_FP16(x) GGML_CPU_COMPUTE_FP32_TO_FP16(x) +#elif defined(__NNPA__) + #define GGML_CPU_COMPUTE_FP16_TO_FP32(x) nnpa_compute_fp16_to_fp32(x) + #define GGML_CPU_COMPUTE_FP32_TO_FP16(x) nnpa_compute_fp32_to_fp16(x) + + #define GGML_CPU_FP16_TO_FP32(x) GGML_CPU_COMPUTE_FP16_TO_FP32(x) + #define GGML_CPU_FP32_TO_FP16(x) GGML_CPU_COMPUTE_FP32_TO_FP16(x) + + static inline float nnpa_compute_fp16_to_fp32(ggml_fp16_t h) { + uint16x8_t v_h = vec_splats(h); + uint16x8_t v_hd = vec_convert_from_fp16(v_h, 0); + return vec_extend_to_fp32_hi(v_hd, 0)[0]; + } + + static inline ggml_fp16_t nnpa_compute_fp32_to_fp16(float f) { + float32x4_t v_f = vec_splats(f); + float32x4_t v_zero = vec_splats(0.0f); + uint16x8_t v_hd = vec_round_from_fp32(v_f, v_zero, 0); + uint16x8_t v_h = vec_convert_to_fp16(v_hd, 0); + return vec_extract(v_h, 0); + } +#endif + +// precomputed f32 table for f16 (256 KB) +// defined in ggml-cpu.c, initialized in ggml_cpu_init() +extern float ggml_table_f32_f16[1 << 16]; + +// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, +// so we define GGML_CPU_FP16_TO_FP32 and GGML_CPU_FP32_TO_FP16 elsewhere for NEON. +// This is also true for POWER9. +#if !defined(GGML_CPU_FP16_TO_FP32) +inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { + uint16_t s; + memcpy(&s, &f, sizeof(uint16_t)); + return ggml_table_f32_f16[s]; +} + +#define GGML_CPU_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x) +#endif + +#if !defined(GGML_CPU_FP32_TO_FP16) +#define GGML_CPU_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) +#endif + + // we define a common set of C macros which map to specific intrinsics based on the current architecture // we then implement the fundamental computation operations below using only these macros // adding support for new architectures requires to define the corresponding SIMD macros @@ -32,7 +189,7 @@ #define GGML_F32xt_LOAD(...) GGML_F32xt_LOAD_IMPL(DEFAULT_PG, __VA_ARGS__) #define GGML_F32xt_STORE_IMPL(pg,a,b) svst1_f32(pg, a, b) #define GGML_F32xt_STORE(...) GGML_F32xt_STORE_IMPL(DEFAULT_PG, __VA_ARGS__) -#define GGML_F32xt_FMA_IMPL(pg, a, b, c) svmad_f32_m(pg, a, b, c) +#define GGML_F32xt_FMA_IMPL(pg, a, b, c) svmad_f32_m(pg, b, c, a) #define GGML_F32xt_FMA(...) GGML_F32xt_FMA_IMPL(DEFAULT_PG, __VA_ARGS__) #define GGML_F32xt_ADD_IMPL(pg, a, b) svadd_f32_m(pg, a, b) #define GGML_F32xt_ADD(...) GGML_F32xt_ADD_IMPL(DEFAULT_PG, __VA_ARGS__) @@ -415,7 +572,7 @@ static inline __m256 __avx_f32cx8_load(const ggml_fp16_t * x) { float tmp[8]; for (int i = 0; i < 8; i++) { - tmp[i] = GGML_FP16_TO_FP32(x[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(x[i]); } return _mm256_loadu_ps(tmp); @@ -426,7 +583,7 @@ static inline void __avx_f32cx8_store(ggml_fp16_t *x, __m256 y) { _mm256_storeu_ps(arr, y); for (int i = 0; i < 8; i++) - x[i] = GGML_FP32_TO_FP16(arr[i]); + x[i] = GGML_CPU_FP32_TO_FP16(arr[i]); } #define GGML_F32Cx8_LOAD(x) __avx_f32cx8_load(x) #define GGML_F32Cx8_STORE(x, y) __avx_f32cx8_store(x, y) @@ -574,10 +731,10 @@ static inline unsigned char ggml_endian_byte(int i) { inline static v128_t __wasm_f16x4_load(const ggml_fp16_t * p) { float tmp[4]; - tmp[0] = GGML_FP16_TO_FP32(p[0]); - tmp[1] = GGML_FP16_TO_FP32(p[1]); - tmp[2] = GGML_FP16_TO_FP32(p[2]); - tmp[3] = GGML_FP16_TO_FP32(p[3]); + tmp[0] = GGML_CPU_FP16_TO_FP32(p[0]); + tmp[1] = GGML_CPU_FP16_TO_FP32(p[1]); + tmp[2] = GGML_CPU_FP16_TO_FP32(p[2]); + tmp[3] = GGML_CPU_FP16_TO_FP32(p[3]); return wasm_v128_load(tmp); } @@ -587,10 +744,10 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) { wasm_v128_store(tmp, x); - p[0] = GGML_FP32_TO_FP16(tmp[0]); - p[1] = GGML_FP32_TO_FP16(tmp[1]); - p[2] = GGML_FP32_TO_FP16(tmp[2]); - p[3] = GGML_FP32_TO_FP16(tmp[3]); + p[0] = GGML_CPU_FP32_TO_FP16(tmp[0]); + p[1] = GGML_CPU_FP32_TO_FP16(tmp[1]); + p[2] = GGML_CPU_FP32_TO_FP16(tmp[2]); + p[3] = GGML_CPU_FP32_TO_FP16(tmp[3]); } #define GGML_F16x4 v128_t @@ -690,10 +847,10 @@ inline static void __wasm_f16x4_store(ggml_fp16_t * p, v128_t x) { static inline __m128 __sse_f16x4_load(const ggml_fp16_t * x) { float tmp[4]; - tmp[0] = GGML_FP16_TO_FP32(x[0]); - tmp[1] = GGML_FP16_TO_FP32(x[1]); - tmp[2] = GGML_FP16_TO_FP32(x[2]); - tmp[3] = GGML_FP16_TO_FP32(x[3]); + tmp[0] = GGML_CPU_FP16_TO_FP32(x[0]); + tmp[1] = GGML_CPU_FP16_TO_FP32(x[1]); + tmp[2] = GGML_CPU_FP16_TO_FP32(x[2]); + tmp[3] = GGML_CPU_FP16_TO_FP32(x[3]); return _mm_loadu_ps(tmp); } @@ -703,10 +860,10 @@ static inline void __sse_f16x4_store(ggml_fp16_t * x, __m128 y) { _mm_storeu_ps(arr, y); - x[0] = GGML_FP32_TO_FP16(arr[0]); - x[1] = GGML_FP32_TO_FP16(arr[1]); - x[2] = GGML_FP32_TO_FP16(arr[2]); - x[3] = GGML_FP32_TO_FP16(arr[3]); + x[0] = GGML_CPU_FP32_TO_FP16(arr[0]); + x[1] = GGML_CPU_FP32_TO_FP16(arr[1]); + x[2] = GGML_CPU_FP32_TO_FP16(arr[2]); + x[3] = GGML_CPU_FP32_TO_FP16(arr[3]); } #define GGML_F32Cx4 __m128 @@ -828,7 +985,7 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) { #define GGML_F32x4_ZERO __lsx_vldi(0) #define GGML_F32x4_SET1(x) __lsx_vinsgr2vr_w(__lsx_vldi(0),(x), 0) #define GGML_F32x4_LOAD(x) __lsx_vld((x), 0) -#define GGML_F32x4_STORE((x),(y)) __lsx_vst((y), (x), 0) +#define GGML_F32x4_STORE(x, y) __lsx_vst(y, x, 0) #define GGML_F32x4_FMA(a, b, c) __lsx_vfmadd_s(b, c, a) #define GGML_F32x4_ADD __lsx_vfadd_s #define GGML_F32x4_MUL __lsx_vfmul_s @@ -874,10 +1031,10 @@ static inline void __lasx_f32cx8_store(ggml_fp16_t * x, __m256 y) { static inline __m128 __lsx_f16x4_load(const ggml_fp16_t * x) { float tmp[4]; - tmp[0] = GGML_FP16_TO_FP32(x[0]); - tmp[1] = GGML_FP16_TO_FP32(x[1]); - tmp[2] = GGML_FP16_TO_FP32(x[2]); - tmp[3] = GGML_FP16_TO_FP32(x[3]); + tmp[0] = GGML_CPU_FP16_TO_FP32(x[0]); + tmp[1] = GGML_CPU_FP16_TO_FP32(x[1]); + tmp[2] = GGML_CPU_FP16_TO_FP32(x[2]); + tmp[3] = GGML_CPU_FP16_TO_FP32(x[3]); return __lsx_vld(tmp, 0); } @@ -887,10 +1044,10 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { __lsx_vst(y, arr, 0); - x[0] = GGML_FP32_TO_FP16(arr[0]); - x[1] = GGML_FP32_TO_FP16(arr[1]); - x[2] = GGML_FP32_TO_FP16(arr[2]); - x[3] = GGML_FP32_TO_FP16(arr[3]); + x[0] = GGML_CPU_FP32_TO_FP16(arr[0]); + x[1] = GGML_CPU_FP32_TO_FP16(arr[1]); + x[2] = GGML_CPU_FP32_TO_FP16(arr[2]); + x[3] = GGML_CPU_FP32_TO_FP16(arr[3]); } #define GGML_F32Cx4 __m128 @@ -922,7 +1079,7 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { #define GGML_F32_STEP 32 #define GGML_F32_EPR 4 -#define GGML_F32x4 __vector float +#define GGML_F32x4 float32x4_t #define GGML_F32x4_ZERO vec_splats(0.0f) #define GGML_F32x4_SET1 vec_splats #define GGML_F32x4_LOAD(p) vec_xl(0, p) @@ -944,10 +1101,8 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { for (int i = 0; i < offset; ++i) { \ x[i] = vec_add(x[i], x[offset + i]); \ } \ - res = vec_extract(x[0], 0) + \ - vec_extract(x[0], 1) + \ - vec_extract(x[0], 2) + \ - vec_extract(x[0], 3); \ + float32x4_t tmp = x[0] + vec_reve(x[0]); \ + res = tmp[0] + tmp[1]; \ } #define GGML_F32_VEC GGML_F32x4 @@ -964,28 +1119,45 @@ static inline void __lsx_f16x4_store(ggml_fp16_t * x, __m128 y) { #define GGML_F16_STEP GGML_F32_STEP #define GGML_F16_EPR GGML_F32_EPR -static inline __vector float __lzs_f16cx4_load(const ggml_fp16_t * x) { +static inline float32x4_t __lzs_f16cx4_load(const ggml_fp16_t * x) { +#if defined(__NNPA__) + uint16x8_t v_x = vec_xl(0, (const ggml_fp16_t *)x); + uint16x8_t v_xd = vec_convert_from_fp16(v_x, 0); + return vec_extend_to_fp32_hi(v_xd, 0); +#else float tmp[4]; for (int i = 0; i < 4; i++) { - tmp[i] = GGML_FP16_TO_FP32(x[i]); + tmp[i] = GGML_CPU_FP16_TO_FP32(x[i]); } // note: keep type-cast here to prevent compiler bugs // see: https://github.com/ggml-org/llama.cpp/issues/12846 return vec_xl(0, (const float *)(tmp)); +#endif } -static inline void __lzs_f16cx4_store(ggml_fp16_t * x, __vector float y) { +static inline void __lzs_f16cx4_store(ggml_fp16_t * x, float32x4_t v_y) { +#if defined(__NNPA__) + float32x4_t v_zero = vec_splats(0.0f); + uint16x8_t v_xd = vec_round_from_fp32(v_y, v_zero, 0); + uint16x8_t v_x = vec_convert_to_fp16(v_xd, 0); + + x[0] = vec_extract(v_x, 0); + x[1] = vec_extract(v_x, 1); + x[2] = vec_extract(v_x, 2); + x[3] = vec_extract(v_x, 3); +#else float arr[4]; // note: keep type-cast here to prevent compiler bugs // see: https://github.com/ggml-org/llama.cpp/issues/12846 - vec_xst(y, 0, (float *)(arr)); + vec_xst(v_y, 0, (float *)(arr)); for (int i = 0; i < 4; i++) { - x[i] = GGML_FP32_TO_FP16(arr[i]); + x[i] = GGML_CPU_FP32_TO_FP16(arr[i]); } +#endif } #define GGML_F16_VEC GGML_F32x4 @@ -1006,3 +1178,7 @@ static inline void __lzs_f16cx4_store(ggml_fp16_t * x, __vector float y) { #define GGML_F32_ARR (GGML_F32_STEP/GGML_F32_EPR) #define GGML_F16_ARR (GGML_F16_STEP/GGML_F16_EPR) #endif + +#ifdef __cplusplus +} +#endif diff --git a/ggml/src/ggml-cpu/vec.cpp b/ggml/src/ggml-cpu/vec.cpp index f7614568ea388..07b377bdd82a7 100644 --- a/ggml/src/ggml-cpu/vec.cpp +++ b/ggml/src/ggml-cpu/vec.cpp @@ -37,35 +37,35 @@ void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * G for (int i = 0; i < np; i += ggml_f32_step) { ax1 = GGML_F32_VEC_LOAD(x + i); ay1 = GGML_F32_VEC_LOAD(y + i); - sum1 = GGML_F32_VEC_FMA(ax1, ay1, sum1); + sum1 = GGML_F32_VEC_FMA(sum1, ax1, ay1); ax2 = GGML_F32_VEC_LOAD(x + i + 1*ggml_f32_epr); ay2 = GGML_F32_VEC_LOAD(y + i + 1*ggml_f32_epr); - sum2 = GGML_F32_VEC_FMA(ax2, ay2, sum2); + sum2 = GGML_F32_VEC_FMA(sum2, ax2, ay2); ax3 = GGML_F32_VEC_LOAD(x + i + 2*ggml_f32_epr); ay3 = GGML_F32_VEC_LOAD(y + i + 2*ggml_f32_epr); - sum3 = GGML_F32_VEC_FMA(ax3, ay3, sum3); + sum3 = GGML_F32_VEC_FMA(sum3, ax3, ay3); ax4 = GGML_F32_VEC_LOAD(x + i + 3*ggml_f32_epr); ay4 = GGML_F32_VEC_LOAD(y + i + 3*ggml_f32_epr); - sum4 = GGML_F32_VEC_FMA(ax4, ay4, sum4); + sum4 = GGML_F32_VEC_FMA(sum4, ax4, ay4); ax5 = GGML_F32_VEC_LOAD(x + i + 4*ggml_f32_epr); ay5 = GGML_F32_VEC_LOAD(y + i + 4*ggml_f32_epr); - sum5 = GGML_F32_VEC_FMA(ax5, ay5, sum5); + sum5 = GGML_F32_VEC_FMA(sum5, ax5, ay5); ax6 = GGML_F32_VEC_LOAD(x + i + 5*ggml_f32_epr); ay6 = GGML_F32_VEC_LOAD(y + i + 5*ggml_f32_epr); - sum6 = GGML_F32_VEC_FMA(ax6, ay6, sum6); + sum6 = GGML_F32_VEC_FMA(sum6, ax6, ay6); ax7 = GGML_F32_VEC_LOAD(x + i + 6*ggml_f32_epr); ay7 = GGML_F32_VEC_LOAD(y + i + 6*ggml_f32_epr); - sum7 = GGML_F32_VEC_FMA(ax7, ay7, sum7); + sum7 = GGML_F32_VEC_FMA(sum7, ax7, ay7); ax8 = GGML_F32_VEC_LOAD(x + i + 7*ggml_f32_epr); ay8 = GGML_F32_VEC_LOAD(y + i + 7*ggml_f32_epr); - sum8 = GGML_F32_VEC_FMA(ax8, ay8, sum8); + sum8 = GGML_F32_VEC_FMA(sum8, ax8, ay8); } // leftovers // Since 8 unrolls are done in above loop, leftovers lie in range [0, ggml_f32_step] which is handled in below loop @@ -73,7 +73,7 @@ void ggml_vec_dot_f32(int n, float * GGML_RESTRICT s, size_t bs, const float * G for (int i = np; i < np2; i += ggml_f32_epr) { ax1 = GGML_F32_VEC_LOAD(x + i); ay1 = GGML_F32_VEC_LOAD(y + i); - sum1 = GGML_F32_VEC_FMA(ax1, ay1, sum1); + sum1 = GGML_F32_VEC_FMA(sum1, ax1, ay1); } // maximum number of leftover elements will be less that ggml_f32_epr. Apply predicated svmad on available elements only if (np2 < n) { @@ -219,11 +219,14 @@ void ggml_vec_dot_f16(int n, float * GGML_RESTRICT s, size_t bs, ggml_fp16_t * G // leftovers for (int i = np; i < n; ++i) { - sumf += (ggml_float)(GGML_FP16_TO_FP32(x[i])*GGML_FP16_TO_FP32(y[i])); + sumf += (ggml_float)(GGML_CPU_FP16_TO_FP32(x[i])*GGML_CPU_FP16_TO_FP32(y[i])); } + + // if you hit this, you are likely running outside the FP range + assert(!isnan(sumf) && !isinf(sumf)); #else for (int i = 0; i < n; ++i) { - sumf += (ggml_float)(GGML_FP16_TO_FP32(x[i])*GGML_FP16_TO_FP32(y[i])); + sumf += (ggml_float)(GGML_CPU_FP16_TO_FP32(x[i])*GGML_CPU_FP16_TO_FP32(y[i])); } #endif @@ -254,6 +257,30 @@ void ggml_vec_silu_f32(const int n, float * y, const float * x) { } } +void ggml_vec_swiglu_f32(const int n, float * y, const float * x, const float * g) { + int i = 0; +#if defined(__AVX512F__) && defined(__AVX512DQ__) + for (; i + 15 < n; i += 16) { + _mm512_storeu_ps(y + i, _mm512_mul_ps(ggml_v_silu(_mm512_loadu_ps(x + i)), _mm512_loadu_ps(g + i))); + } +#elif defined(__AVX2__) && defined(__FMA__) + for (; i + 7 < n; i += 8) { + _mm256_storeu_ps(y + i, _mm256_mul_ps(ggml_v_silu(_mm256_loadu_ps(x + i)), _mm256_loadu_ps(g + i))); + } +#elif defined(__SSE2__) + for (; i + 3 < n; i += 4) { + _mm_storeu_ps(y + i, _mm_mul_ps(ggml_v_silu(_mm_loadu_ps(x + i)), _mm_loadu_ps(g + i))); + } +#elif defined(__ARM_NEON) && defined(__aarch64__) + for (; i + 3 < n; i += 4) { + vst1q_f32(y + i, vmulq_f32(ggml_v_silu(vld1q_f32(x + i)), vld1q_f32(g + i))); + } +#endif + for (; i < n; ++i) { + y[i] = ggml_silu_f32(x[i]) * g[i]; + } +} + ggml_float ggml_vec_soft_max_f32(const int n, float * y, const float * x, float max) { int i = 0; ggml_float sum = 0; diff --git a/ggml/src/ggml-cpu/vec.h b/ggml/src/ggml-cpu/vec.h index 09dbade2179fb..d18783a00a1a5 100644 --- a/ggml/src/ggml-cpu/vec.h +++ b/ggml/src/ggml-cpu/vec.h @@ -58,7 +58,7 @@ inline static void ggml_vec_set_bf16(const int n, ggml_bf16_t * x, const ggml_bf inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] + y[i]; } inline static void ggml_vec_add_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) { for (int i = 0; i < n; ++i) { - z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) + GGML_FP16_TO_FP32(y[i])); + z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) + GGML_CPU_FP16_TO_FP32(y[i])); } } inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i = 0; i < n; ++i) z[i] = x[i] + v; } @@ -67,7 +67,7 @@ inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i] - y[i]; } inline static void ggml_vec_sub_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) { for (int i = 0; i < n; ++i) { - z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) - GGML_FP16_TO_FP32(y[i])); + z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) - GGML_CPU_FP16_TO_FP32(y[i])); } } inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i = 0; i < n; ++i) x[i] = v; } @@ -75,20 +75,20 @@ inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = -x[i]; } inline static void ggml_vec_neg_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(-GGML_FP16_TO_FP32(x[i])); + y[i] = GGML_CPU_FP32_TO_FP16(-GGML_CPU_FP16_TO_FP32(x[i])); } } inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]*y[i]; } inline static void ggml_vec_mul_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) { for (int i = 0; i < n; ++i) { - z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) * GGML_FP16_TO_FP32(y[i])); + z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) * GGML_CPU_FP16_TO_FP32(y[i])); } } inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i = 0; i < n; ++i) z[i] = x[i]/y[i]; } inline static void ggml_vec_div_f16 (const int n, ggml_fp16_t * z, const ggml_fp16_t * x, const ggml_fp16_t * y) { for (int i = 0; i < n; ++i) { - z[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(x[i]) / GGML_FP16_TO_FP32(y[i])); + z[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(x[i]) / GGML_CPU_FP16_TO_FP32(y[i])); } } @@ -131,13 +131,13 @@ inline static void ggml_vec_dot_f16_unroll(const int n, const int xs, float * GG // leftovers for (int i = np; i < n; ++i) { for (int j = 0; j < GGML_VEC_DOT_UNROLL; ++j) { - sumf[j] += (ggml_float)(GGML_FP16_TO_FP32(x[j][i])*GGML_FP16_TO_FP32(y[i])); + sumf[j] += (ggml_float)(GGML_CPU_FP16_TO_FP32(x[j][i])*GGML_CPU_FP16_TO_FP32(y[i])); } } #else for (int i = 0; i < n; ++i) { for (int j = 0; j < GGML_VEC_DOT_UNROLL; ++j) { - sumf[j] += (ggml_float)(GGML_FP16_TO_FP32(x[j][i])*GGML_FP16_TO_FP32(y[i])); + sumf[j] += (ggml_float)(GGML_CPU_FP16_TO_FP32(x[j][i])*GGML_CPU_FP16_TO_FP32(y[i])); } } #endif @@ -163,49 +163,49 @@ inline static void ggml_vec_mad_f32(const int n, float * GGML_RESTRICT y, const ax1 = GGML_F32_VEC_LOAD(x + i); ay1 = GGML_F32_VEC_LOAD(y + i); - ay1 = GGML_F32_VEC_FMA(ax1, vx, ay1); + ay1 = GGML_F32_VEC_FMA(ay1, ax1, vx); GGML_F32_VEC_STORE(y + i, ay1); ax2 = GGML_F32_VEC_LOAD(x + i + 1*ggml_f32_epr); ay2 = GGML_F32_VEC_LOAD(y + i + 1*ggml_f32_epr); - ay2 = GGML_F32_VEC_FMA(ax2, vx, ay2); + ay2 = GGML_F32_VEC_FMA(ay2, ax2, vx); GGML_F32_VEC_STORE(y + i + 1*ggml_f32_epr, ay2); ax3 = GGML_F32_VEC_LOAD(x + i + 2*ggml_f32_epr); ay3 = GGML_F32_VEC_LOAD(y + i + 2*ggml_f32_epr); - ay3 = GGML_F32_VEC_FMA(ax3, vx, ay3); + ay3 = GGML_F32_VEC_FMA(ay3, ax3, vx); GGML_F32_VEC_STORE(y + i + 2*ggml_f32_epr, ay3); ax4 = GGML_F32_VEC_LOAD(x + i + 3*ggml_f32_epr); ay4 = GGML_F32_VEC_LOAD(y + i + 3*ggml_f32_epr); - ay4 = GGML_F32_VEC_FMA(ax4, vx, ay4); + ay4 = GGML_F32_VEC_FMA(ay4, ax4, vx); GGML_F32_VEC_STORE(y + i + 3*ggml_f32_epr, ay4); ax5 = GGML_F32_VEC_LOAD(x + i + 4*ggml_f32_epr); ay5 = GGML_F32_VEC_LOAD(y + i + 4*ggml_f32_epr); - ay5 = GGML_F32_VEC_FMA(ax5, vx, ay5); + ay5 = GGML_F32_VEC_FMA(ay5, ax5, vx); GGML_F32_VEC_STORE(y + i + 4*ggml_f32_epr, ay5); ax6 = GGML_F32_VEC_LOAD(x + i + 5*ggml_f32_epr); ay6 = GGML_F32_VEC_LOAD(y + i + 5*ggml_f32_epr); - ay6 = GGML_F32_VEC_FMA(ax6, vx, ay6); + ay6 = GGML_F32_VEC_FMA(ay6, ax6, vx); GGML_F32_VEC_STORE(y + i + 5*ggml_f32_epr, ay6); ax7 = GGML_F32_VEC_LOAD(x + i + 6*ggml_f32_epr); ay7 = GGML_F32_VEC_LOAD(y + i + 6*ggml_f32_epr); - ay7 = GGML_F32_VEC_FMA(ax7, vx, ay7); + ay7 = GGML_F32_VEC_FMA(ay7, ax7, vx); GGML_F32_VEC_STORE(y + i + 6*ggml_f32_epr, ay7); ax8 = GGML_F32_VEC_LOAD(x + i + 7*ggml_f32_epr); ay8 = GGML_F32_VEC_LOAD(y + i + 7*ggml_f32_epr); - ay8 = GGML_F32_VEC_FMA(ax8, vx, ay8); + ay8 = GGML_F32_VEC_FMA(ay8, ax8, vx); GGML_F32_VEC_STORE(y + i + 7*ggml_f32_epr, ay8); } @@ -215,7 +215,7 @@ inline static void ggml_vec_mad_f32(const int n, float * GGML_RESTRICT y, const for (int i = np; i < np2; i += ggml_f32_epr) { ax1 = GGML_F32_VEC_LOAD(x + i); ay1 = GGML_F32_VEC_LOAD(y + i); - ay1 = GGML_F32_VEC_FMA(ax1, vx, ay1); + ay1 = GGML_F32_VEC_FMA(ay1, ax1, vx); GGML_F32_VEC_STORE(y + i, ay1); } @@ -280,12 +280,12 @@ inline static void ggml_vec_mad_f16(const int n, ggml_fp16_t * GGML_RESTRICT y, // leftovers for (int i = np; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(y[i]) + GGML_FP16_TO_FP32(x[i])*v); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(y[i]) + GGML_CPU_FP16_TO_FP32(x[i])*v); } #else // scalar for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(y[i]) + GGML_FP16_TO_FP32(x[i])*v); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(y[i]) + GGML_CPU_FP16_TO_FP32(x[i])*v); } #endif } @@ -351,6 +351,45 @@ inline static void ggml_vec_mad_f32_unroll(const int n, const int xs, const int #endif } +inline static void ggml_vec_mad1_f32(const int n, float * y, const float * x, const float s, const float b) { +#if defined(GGML_USE_ACCELERATE) + vDSP_vsmsa(x, 1, &s, &b, y, 1, n); +#elif defined(GGML_SIMD) + #if defined(__ARM_FEATURE_SVE) + // scalar ; TODO: Write SVE code + for (int i = 0; i < n; ++i) { + y[i] = x[i]*s + b; + } + #else + const int np = (n & ~(GGML_F32_STEP - 1)); + + GGML_F32_VEC vs = GGML_F32_VEC_SET1(s); + GGML_F32_VEC vb = GGML_F32_VEC_SET1(b); + + GGML_F32_VEC ay[GGML_F32_ARR]; + + for (int i = 0; i < np; i += GGML_F32_STEP) { + for (int j = 0; j < GGML_F32_ARR; j++) { + ay[j] = GGML_F32_VEC_LOAD(x + i + j*GGML_F32_EPR); + ay[j] = GGML_F32_VEC_FMA(ay[j], vs, vb); + + GGML_F32_VEC_STORE(y + i + j*GGML_F32_EPR, ay[j]); + } + } + + // leftovers + for (int i = np; i < n; ++i) { + y[i] = x[i]*s + b; + } + #endif +#else + // scalar + for (int i = 0; i < n; ++i) { + y[i] = x[i]*s + b; + } +#endif +} + //inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i = 0; i < n; ++i) y[i] *= v; } inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { #if defined(GGML_USE_ACCELERATE) @@ -430,12 +469,12 @@ inline static void ggml_vec_scale_f16(const int n, ggml_fp16_t * y, const float // leftovers for (int i = np; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(y[i])*v); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(y[i])*v); } #else // scalar for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(y[i])*v); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(y[i])*v); } #endif } @@ -444,103 +483,103 @@ inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i]*x[i]; } inline static void ggml_vec_sqr_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16(v*v); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16(v*v); } } inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sqrtf(x[i]); } inline static void ggml_vec_sqrt_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(sqrtf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(sqrtf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = logf(x[i]); } inline static void ggml_vec_log_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(logf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(logf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_sin_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = sinf(x[i]); } inline static void ggml_vec_sin_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(sinf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(sinf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_cos_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = cosf(x[i]); } inline static void ggml_vec_cos_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(cosf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(cosf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); } inline static void ggml_vec_abs_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(fabsf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(fabsf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); } inline static void ggml_vec_sgn_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f)); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? 1.f : ((v < 0.f) ? -1.f : 0.f)); } } inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; } inline static void ggml_vec_step_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16((GGML_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f); + y[i] = GGML_CPU_FP32_TO_FP16((GGML_CPU_FP16_TO_FP32(x[i]) > 0.f) ? 1.f : 0.f); } } inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); } inline static void ggml_vec_tanh_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(tanhf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(tanhf(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); } inline static void ggml_vec_elu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(expm1f(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(expm1f(GGML_CPU_FP16_TO_FP32(x[i]))); } } inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; } inline static void ggml_vec_relu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16((v > 0.f) ? v : 0.f); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? v : 0.f); } } inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); } inline static void ggml_vec_leaky_relu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const float ns) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f)); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16(((v > 0.f) ? v : 0.f) + ns * ((v < 0.0f) ? v : 0.f)); } } inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); } inline static void ggml_vec_sigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(1.f / (1.f + expf(-GGML_FP16_TO_FP32(x[i])))); + y[i] = GGML_CPU_FP32_TO_FP16(1.f / (1.f + expf(-GGML_CPU_FP16_TO_FP32(x[i])))); } } // TODO: optimize performance inline static void ggml_vec_hardswish_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = x[i] * fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } inline static void ggml_vec_hardswish_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f))); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16(v * fminf(1.0f, fmaxf(0.0f, (v + 3.0f) / 6.0f))); } } inline static void ggml_vec_hardsigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fminf(1.0f, fmaxf(0.0f, (x[i] + 3.0f) / 6.0f)); } inline static void ggml_vec_hardsigmoid_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (GGML_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f))); + y[i] = GGML_CPU_FP32_TO_FP16(fminf(1.0f, fmaxf(0.0f, (GGML_CPU_FP16_TO_FP32(x[i]) + 3.0f) / 6.0f))); } } inline static void ggml_vec_exp_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = expf(x[i]); } inline static void ggml_vec_exp_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - y[i] = GGML_FP32_TO_FP16(expf(GGML_FP16_TO_FP32(x[i]))); + y[i] = GGML_CPU_FP32_TO_FP16(expf(GGML_CPU_FP16_TO_FP32(x[i]))); } } @@ -562,9 +601,9 @@ inline static void ggml_vec_gelu_f16(const int n, ggml_fp16_t * y, const ggml_fp inline static void ggml_vec_gelu_erf_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float xi = GGML_FP16_TO_FP32(x[i]); + float xi = GGML_CPU_FP16_TO_FP32(x[i]); float res = 0.5f*xi*(1.0f + erff(xi*SQRT_2_INV)); - y[i] = GGML_FP32_TO_FP16(res); + y[i] = GGML_CPU_FP32_TO_FP16(res); } } @@ -577,9 +616,9 @@ inline static void ggml_vec_gelu_f32(const int n, float * y, const float * x) { } else if (x[i] >= 10.0f) { y[i] = x[i]; } else { - ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + ggml_fp16_t fp16 = GGML_CPU_FP32_TO_FP16(x[i]); memcpy(&t, &fp16, sizeof(uint16_t)); - y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_f16[t]); + y[i] = GGML_CPU_FP16_TO_FP32(ggml_table_gelu_f16[t]); } } } @@ -613,9 +652,9 @@ inline static float ggml_gelu_quick_f32(float x) { inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float * x) { uint16_t t; for (int i = 0; i < n; ++i) { - ggml_fp16_t fp16 = GGML_FP32_TO_FP16(x[i]); + ggml_fp16_t fp16 = GGML_CPU_FP32_TO_FP16(x[i]); memcpy(&t, &fp16, sizeof(uint16_t)); - y[i] = GGML_FP16_TO_FP32(ggml_table_gelu_quick_f16[t]); + y[i] = GGML_CPU_FP16_TO_FP32(ggml_table_gelu_quick_f16[t]); } } #else @@ -628,8 +667,8 @@ inline static void ggml_vec_gelu_quick_f32(const int n, float * y, const float * inline static void ggml_vec_gelu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { for (int i = 0; i < n; ++i) { - float v = GGML_FP16_TO_FP32(x[i]); - y[i] = GGML_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v)))); + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16(v*(1.0f/(1.0f+expf(GELU_QUICK_COEF*v)))); } } @@ -638,8 +677,8 @@ inline static float ggml_silu_f32(float x) { return x/(1.0f + expf(-x)); } inline static ggml_fp16_t ggml_silu_f16(ggml_fp16_t x) { - float v = GGML_FP16_TO_FP32(x); - return GGML_FP32_TO_FP16(v/(1.0f + expf(-v))); + float v = GGML_CPU_FP16_TO_FP32(x); + return GGML_CPU_FP32_TO_FP16(v/(1.0f + expf(-v))); } #if __FINITE_MATH_ONLY__ @@ -888,9 +927,9 @@ inline static float ggml_silu_backward_f32(float x, float dy) { } inline static ggml_fp16_t ggml_silu_backward_f16(ggml_fp16_t x, ggml_fp16_t dy) { - const float v = GGML_FP16_TO_FP32(x); + const float v = GGML_CPU_FP16_TO_FP32(x); const float s = 1.0f/(1.0f + expf(-v)); - return GGML_FP32_TO_FP16(GGML_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s))); + return GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(dy)*s*(1.0f + v*(1.0f - s))); } inline static void ggml_vec_silu_backward_f32(const int n, float * dx, const float * x, const float * dy) { @@ -905,6 +944,100 @@ inline static void ggml_vec_silu_backward_f16(const int n, ggml_fp16_t * dx, con } } +inline static void ggml_vec_reglu_f32 (const int n, float * y, const float * x, const float * g) { + for (int i = 0; i < n; ++i) { + y[i] = (x[i] > 0.f) ? x[i] * g[i] : 0.f; + } +} + +inline static void ggml_vec_reglu_f16 (const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) { + for (int i = 0; i < n; ++i) { + float v = GGML_CPU_FP16_TO_FP32(x[i]); + y[i] = GGML_CPU_FP32_TO_FP16((v > 0.f) ? v * GGML_CPU_FP16_TO_FP32(g[i]) : 0.f); + } +} + +#ifdef GGML_GELU_FP16 +inline static void ggml_vec_geglu_f32(const int n, float * y, const float * x, const float * g) { + uint16_t t; + for (int i = 0; i < n; ++i) { + if (x[i] <= -10.0f) { + y[i] = 0.0f; + } else if (x[i] >= 10.0f) { + y[i] = x[i] * g[i]; + } else { + ggml_fp16_t fp16 = GGML_CPU_FP32_TO_FP16(x[i]); + memcpy(&t, &fp16, sizeof(uint16_t)); + y[i] = GGML_CPU_FP16_TO_FP32(ggml_table_gelu_f16[t]) * g[i]; + } + } +} +#else +inline static void ggml_vec_geglu_f32(const int n, float * y, const float * x, const float * g) { + for (int i = 0; i < n; ++i) { + y[i] = ggml_gelu_f32(x[i]) * g[i]; + } +} +#endif + +inline static void ggml_vec_geglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) { + const uint16_t * i16 = (const uint16_t *) x; + for (int i = 0; i < n; ++i) { + float v = GGML_CPU_FP16_TO_FP32(g[i]); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(ggml_table_gelu_f16[i16[i]]) * v); + } +} + +void ggml_vec_swiglu_f32(const int n, float * y, const float * x, const float * g); + +inline static void ggml_vec_swiglu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) { + for (int i = 0; i < n; ++i) { + float v = GGML_CPU_FP16_TO_FP32(x[i]); + float w = GGML_CPU_FP16_TO_FP32(g[i]); + y[i] = GGML_CPU_FP32_TO_FP16((v/(1.0f + expf(-v))) * w); + } +} + +inline static void ggml_vec_geglu_erf_f32(const int n, float * y, const float * x, const float * g) { + for (int i = 0; i < n; ++i) { + float xi = x[i]; + y[i] = 0.5f * xi * (1.0f + erff(xi*SQRT_2_INV)) * g[i]; + } +} + +inline static void ggml_vec_geglu_erf_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) { + for (int i = 0; i < n; ++i) { + float xi = GGML_CPU_FP16_TO_FP32(x[i]); + float gi = GGML_CPU_FP16_TO_FP32(g[i]); + y[i] = GGML_CPU_FP32_TO_FP16(0.5f * xi * (1.0f + erff(xi*SQRT_2_INV)) * gi); + } +} + +#ifdef GGML_GELU_QUICK_FP16 +inline static void ggml_vec_geglu_quick_f32(const int n, float * y, const float * x, const float * g) { + uint16_t t; + for (int i = 0; i < n; ++i) { + ggml_fp16_t fp16 = GGML_CPU_FP32_TO_FP16(x[i]); + memcpy(&t, &fp16, sizeof(uint16_t)); + y[i] = GGML_CPU_FP16_TO_FP32(ggml_table_gelu_quick_f16[t]) * g[i]; + } +} +#else +inline static void ggml_vec_geglu_quick_f32(const int n, float * y, const float * x, const float * g) { + for (int i = 0; i < n; ++i) { + y[i] = ggml_gelu_quick_f32(x[i]) * g[i]; + } +} +#endif + +inline static void ggml_vec_geglu_quick_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x, const ggml_fp16_t * g) { + const uint16_t * i16 = (const uint16_t *) x; + for (int i = 0; i < n; ++i) { + float v = GGML_CPU_FP16_TO_FP32(g[i]); + y[i] = GGML_CPU_FP32_TO_FP16(GGML_CPU_FP16_TO_FP32(ggml_table_gelu_quick_f16[i16[i]]) * v); + } +} + inline static void ggml_vec_sum_f32(const int n, float * s, const float * x) { #ifndef GGML_USE_ACCELERATE ggml_float sum = 0.0; @@ -928,7 +1061,7 @@ inline static void ggml_vec_sum_f32_ggf(const int n, ggml_float * s, const float inline static void ggml_vec_sum_f16_ggf(const int n, float * s, const ggml_fp16_t * x) { float sum = 0.0f; for (int i = 0; i < n; ++i) { - sum += GGML_FP16_TO_FP32(x[i]); + sum += GGML_CPU_FP16_TO_FP32(x[i]); } *s = sum; } diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index a82ec26ee1a2d..1a2708ec9dff5 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -19,10 +19,10 @@ #endif #include "ggml-common.h" -#include #include #include #include +#include #include #include @@ -76,11 +76,9 @@ #define GGML_CUDA_CC_IS_CDNA(cc) (cc >= GGML_CUDA_CC_CDNA && cc < GGML_CUDA_CC_RDNA1) // Moore Threads -#define GGML_CUDA_MUSA_ARCH_IS_QY1 (__MUSA_ARCH__ <= 210) - -#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000 -#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000 -#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD +#define GGML_CUDA_CC_QY1 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x210) // MTT S80, MTT S3000 +#define GGML_CUDA_CC_QY2 (GGML_CUDA_CC_OFFSET_MTHREADS + 0x220) // MTT S4000 +#define GGML_CUDA_CC_NG (GGML_CUDA_CC_OFFSET_MTHREADS + 0x310) // TBD #define GGML_CUDA_CC_IS_MTHREADS(cc) (cc >= GGML_CUDA_CC_OFFSET_MTHREADS && cc < GGML_CUDA_CC_OFFSET_AMD) #define GGML_CUDA_CC_IS_QY1(cc) (cc >= GGML_CUDA_CC_QY1 && cc < GGML_CUDA_CC_QY2) @@ -177,6 +175,23 @@ static const char * cu_get_error_str(CUresult err) { #define CU_CHECK(err) CUDA_CHECK_GEN(err, CUDA_SUCCESS, cu_get_error_str) #endif +#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) +# define CUDA_SET_SHARED_MEMORY_LIMIT(kernel, nbytes) \ + do { \ + static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = { false }; \ + const int id = ggml_cuda_get_device(); \ + if (!shared_memory_limit_raised[id]) { \ + CUDA_CHECK(cudaFuncSetAttribute(kernel, cudaFuncAttributeMaxDynamicSharedMemorySize, nbytes)); \ + shared_memory_limit_raised[id] = true; \ + } \ + } while (0) +#else +# define CUDA_SET_SHARED_MEMORY_LIMIT(kernel, nbytes) \ + do { \ + GGML_UNUSED(nbytes); \ + } while (0) +#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) + #if CUDART_VERSION >= 11010 || defined(GGML_USE_MUSA) #define GGML_CUDA_ASSUME(x) __builtin_assume(x) #else @@ -203,13 +218,13 @@ typedef float2 dfloat2; #define FAST_FP16_AVAILABLE #endif // defined(FP16_AVAILABLE) && __CUDA_ARCH__ != 610 -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA +#if (!defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA) || defined(GGML_USE_MUSA) #define FP16_MMA_AVAILABLE -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA +#endif // (!defined(GGML_USE_HIP) && __CUDA_ARCH__ >= GGML_CUDA_CC_VOLTA) || defined(GGML_USE_MUSA) -#if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4)) +#if defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4))) #define FP16_MMA_AVAILABLE -#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || defined(RDNA4)) +#endif // defined(GGML_HIP_ROCWMMA_FATTN) && (defined(CDNA) || defined(RDNA3) || (defined(GGML_HIP_ROCWMMA_FATTN_GFX12) && defined(RDNA4))) #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_TURING #define NEW_MMA_AVAILABLE @@ -219,9 +234,9 @@ typedef float2 dfloat2; #define CP_ASYNC_AVAILABLE #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && __CUDA_ARCH__ >= GGML_CUDA_CC_AMPERE -#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1) +#if !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < 220) #define FLASH_ATTN_AVAILABLE -#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && GGML_CUDA_MUSA_ARCH_IS_QY1) +#endif // !defined(GGML_CUDA_NO_FA) && !(defined(GGML_USE_MUSA) && __MUSA_ARCH__ < 220) static bool fp16_available(const int cc) { return ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_PASCAL; @@ -233,7 +248,8 @@ static bool fast_fp16_available(const int cc) { // To be used for feature selection of external libraries, e.g. cuBLAS. static bool fast_fp16_hardware_available(const int cc) { - return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_PASCAL && cc != 610) || GGML_CUDA_CC_IS_AMD(cc); + return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_PASCAL && cc != 610) || GGML_CUDA_CC_IS_AMD(cc) || + (GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2); } // Any FP16 tensor core instructions are available for ggml code. @@ -241,15 +257,35 @@ static bool fp16_mma_available(const int cc) { #if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN) return false; #else - return (GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) || - GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc); + if ((GGML_CUDA_CC_IS_NVIDIA(cc) && ggml_cuda_highest_compiled_arch(cc) >= GGML_CUDA_CC_VOLTA) || + GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc) || + GGML_CUDA_CC_IS_MTHREADS(cc)) { + return true; + } else if (GGML_CUDA_CC_IS_RDNA4(cc)) { +#if defined(GGML_HIP_ROCWMMA_FATTN) && defined(GGML_HIP_ROCWMMA_FATTN_GFX12) + return true; +#else + return false; +#endif // defined(GGML_HIP_ROCWMMA_FATTN) && defined(GGML_HIP_ROCWMMA_FATTN_GFX12) + } else { + return false; + } #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && !defined(GGML_HIP_ROCWMMA_FATTN) } // To be used for feature selection of external libraries, e.g. cuBLAS. static bool fp16_mma_hardware_available(const int cc) { return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) || - GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc); + GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc) || + (GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2); +} + +static bool bf16_mma_hardware_available(const int cc) { + return (GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_AMPERE) || GGML_CUDA_CC_IS_CDNA(cc) || cc >= GGML_CUDA_CC_RDNA3; +} + +static bool fp32_mma_hardware_available(const int cc) { + return GGML_CUDA_CC_IS_CDNA(cc); } // Volta technically had FP16 tensor cores but they work very differently compared to Turing and later. @@ -262,11 +298,11 @@ static bool cp_async_available(const int cc) { } static constexpr __device__ int ggml_cuda_get_physical_warp_size() { -#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) - return __AMDGCN_WAVEFRONT_SIZE; +#if defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && (defined(__GFX9__) || defined(__GFX8__)) + return 64; #else return 32; -#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) +#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__) && (defined(__GFX9__) || defined(__GFX8__)) } [[noreturn]] @@ -362,6 +398,26 @@ static __device__ __forceinline__ half2 warp_reduce_sum(half2 a) { #endif // FP16_AVAILABLE } +// Row reduction kernel template - compute sum (norm=false) or mean (norm=true) +template +static __global__ void reduce_rows_f32(const float * x, float * dst, const int ncols) { + const int row = blockIdx.x; + const int col = threadIdx.x; + + float sum = 0.0f; + for (int i = col; i < ncols; i += blockDim.x) { + sum += x[row * ncols + i]; + } + + sum = warp_reduce_sum(sum); + + if (col != 0) { + return; + } + + dst[row] = norm ? sum / ncols : sum; +} + template static __device__ __forceinline__ float warp_reduce_max(float x) { #pragma unroll @@ -767,21 +823,7 @@ struct ggml_backend_cuda_context { name(GGML_CUDA_NAME + std::to_string(device)) { } - ~ggml_backend_cuda_context() { - if (copy_event != nullptr) { - CUDA_CHECK(cudaEventDestroy(copy_event)); - } - for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) { - for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) { - if (streams[i][j] != nullptr) { - CUDA_CHECK(cudaStreamDestroy(streams[i][j])); - } - } - if (cublas_handles[i] != nullptr) { - CUBLAS_CHECK(cublasDestroy(cublas_handles[i])); - } - } - } + ~ggml_backend_cuda_context(); cudaStream_t stream(int device, int stream) { if (streams[device][stream] == nullptr) { diff --git a/ggml/src/ggml-cuda/conv2d-dw.cu b/ggml/src/ggml-cuda/conv2d-dw.cu new file mode 100644 index 0000000000000..7583233b1b7cd --- /dev/null +++ b/ggml/src/ggml-cuda/conv2d-dw.cu @@ -0,0 +1,161 @@ +#include "conv2d-dw.cuh" + +struct conv_params { + int in_w, in_h; + int out_w, out_h; + int kernel_w, kernel_h; + int stride_x, stride_y; + int padding_x, padding_y; + int dilation_x, dilation_y; + int channels, batches; +}; + +struct kernel_bounds { + int y_min, y_max; + int x_min, x_max; +}; + +__device__ __forceinline__ kernel_bounds calculate_kernel_bounds(int out_x, int out_y, const conv_params & params) { + kernel_bounds bounds; + bounds.y_min = max(0, (params.padding_y - out_y * params.stride_y + params.dilation_y - 1) / params.dilation_y); + bounds.y_max = + min(params.kernel_h, + (params.in_h + params.padding_y - out_y * params.stride_y + params.dilation_y - 1) / params.dilation_y); + bounds.x_min = max(0, (params.padding_x - out_x * params.stride_x + params.dilation_x - 1) / params.dilation_x); + bounds.x_max = + min(params.kernel_w, + (params.in_w + params.padding_x - out_x * params.stride_x + params.dilation_x - 1) / params.dilation_x); + return bounds; +} + +__device__ __forceinline__ int calculate_input_coord(int out_coord, int kern_coord, int stride, int dilation, int padding) { + return out_coord * stride + kern_coord * dilation - padding; +} + +struct whcn_layout { + __device__ static int input_index(int n, int c, int y, int x, const conv_params & params) { + return n * (params.channels * params.in_w * params.in_h) + c * params.in_w * params.in_h + y * params.in_w + x; + } + + __device__ static int kernel_index(int c, int ky, int kx, const conv_params & params) { + return c * params.kernel_h * params.kernel_w + ky * params.kernel_w + kx; + } + + __device__ static int output_index(int n, int c, int y, int x, const conv_params & params) { + return n * (params.channels * params.out_w * params.out_h) + c * params.out_w * params.out_h + + y * params.out_w + x; + } + + __device__ static void unpack_indices(int global_idx, const conv_params & params, int & n, int & c, int & out_y, + int & out_x) { + out_x = global_idx % params.out_w; + out_y = (global_idx / params.out_w) % params.out_h; + c = (global_idx / (params.out_w * params.out_h)) % params.channels; + n = global_idx / (params.out_w * params.out_h * params.channels); + } +}; + +struct cwhn_layout { + __device__ static int input_index(int n, int c, int y, int x, const conv_params & params) { + return n * (params.channels * params.in_w * params.in_h) + (y * params.in_w + x) * params.channels + c; + } + + __device__ static int kernel_index(int c, int ky, int kx, const conv_params & params) { + return (ky * params.kernel_w + kx) * params.channels + c; + } + + __device__ static int output_index(int n, int c, int y, int x, const conv_params & params) { + return n * (params.channels * params.out_w * params.out_h) + y * (params.out_w * params.channels) + + x * params.channels + c; + } + + __device__ static void unpack_indices(int global_idx, const conv_params & params, int & n, int & c, int & out_y, + int & out_x) { + c = global_idx % params.channels; + out_x = (global_idx / params.channels) % params.out_w; + out_y = (global_idx / (params.channels * params.out_w)) % params.out_h; + n = global_idx / (params.channels * params.out_w * params.out_h); + } +}; + +template +__global__ void conv2d_dw_kernel(const T * __restrict__ input, const T * __restrict__ kernel, T * __restrict__ output, + const int in_w, const int in_h, const int out_w, const int out_h, + const int kernel_w, const int kernel_h, const int stride_x, const int stride_y, + const int padding_x, const int padding_y, const int dilation_x, const int dilation_y, + const int channels, const int batches) { + const int global_idx = blockIdx.x * blockDim.x + threadIdx.x; + const int total_elements = batches * channels * out_h * out_w; + + if (global_idx >= total_elements) { + return; + } + + conv_params params = { in_w, in_h, out_w, out_h, kernel_w, kernel_h, stride_x, + stride_y, padding_x, padding_y, dilation_x, dilation_y, channels, batches }; + + int batch_idx, channel_idx, out_y_idx, out_x_idx; + Layout::unpack_indices(global_idx, params, batch_idx, channel_idx, out_y_idx, out_x_idx); + + T accumulator = 0; + kernel_bounds bounds = calculate_kernel_bounds(out_x_idx, out_y_idx, params); + + for (int kern_y = bounds.y_min; kern_y < bounds.y_max; ++kern_y) { + int in_y_idx = calculate_input_coord(out_y_idx, kern_y, params.stride_y, params.dilation_y, params.padding_y); + + for (int kern_x = bounds.x_min; kern_x < bounds.x_max; ++kern_x) { + int in_x_idx = calculate_input_coord(out_x_idx, kern_x, params.stride_x, params.dilation_x, params.padding_x); + + const T input_val = input[Layout::input_index(batch_idx, channel_idx, in_y_idx, in_x_idx, params)]; + const T kernel_val = kernel[Layout::kernel_index(channel_idx, kern_y, kern_x, params)]; + + accumulator += input_val * kernel_val; + } + } + + output[Layout::output_index(batch_idx, channel_idx, out_y_idx, out_x_idx, params)] = accumulator; +} + +void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * kernel = dst->src[0]; + const ggml_tensor * input = dst->src[1]; + + GGML_ASSERT(kernel->type == GGML_TYPE_F32 && input->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + const float * w_d = (const float *) kernel->data; + const float * x_d = (const float *) input->data; + float * y_d = (float *) dst->data; + + const int32_t * p = (const int32_t *) dst->op_params; + const int stride_x = p[0]; + const int stride_y = p[1]; + const int padding_x = p[2]; + const int padding_y = p[3]; + const int dilation_x = p[4]; + const int dilation_y = p[5]; + + const int in_w = input->ne[0]; + const int in_h = input->ne[1]; + const int kernel_w = kernel->ne[0]; + const int kernel_h = kernel->ne[1]; + const int out_w = dst->ne[0]; + const int out_h = dst->ne[1]; + const int channels = dst->ne[2]; + const int batches = dst->ne[3]; + + cudaStream_t st = ctx.stream(); + + const int total = batches * channels * out_h * out_w; + const int blocks = (total + CUDA_CONV2D_DW_BLOCK_SIZE - 1) / CUDA_CONV2D_DW_BLOCK_SIZE; + + if (ggml_is_contiguous(input)) { + conv2d_dw_kernel<<>>( + x_d, w_d, y_d, in_w, in_h, out_w, out_h, kernel_w, kernel_h, stride_x, stride_y, padding_x, padding_y, + dilation_x, dilation_y, channels, batches); + } else if (ggml_is_contiguous_channels(input)) { + conv2d_dw_kernel<<>>( + x_d, w_d, y_d, in_w, in_h, out_w, out_h, kernel_w, kernel_h, stride_x, stride_y, padding_x, padding_y, + dilation_x, dilation_y, channels, batches); + } else { + GGML_ABORT("Unsupported memory layout for conv_2d_dw"); + } +} diff --git a/ggml/src/ggml-cuda/conv2d-dw.cuh b/ggml/src/ggml-cuda/conv2d-dw.cuh new file mode 100644 index 0000000000000..b5d5a69d345cf --- /dev/null +++ b/ggml/src/ggml-cuda/conv2d-dw.cuh @@ -0,0 +1,5 @@ +#pragma once +#include "common.cuh" + +#define CUDA_CONV2D_DW_BLOCK_SIZE 256 +void ggml_cuda_op_conv2d_dw(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/conv2d-transpose.cu b/ggml/src/ggml-cuda/conv2d-transpose.cu new file mode 100644 index 0000000000000..03224e404d32d --- /dev/null +++ b/ggml/src/ggml-cuda/conv2d-transpose.cu @@ -0,0 +1,91 @@ +#include + +#include "conv2d-transpose.cuh" +#include "ggml.h" + +__global__ void conv2d_transpose_kernel(const float * __restrict__ input, const half * __restrict__ kernel, + float * __restrict__ output, const int in_w, const int in_h, const int out_w, + const int out_h, const int kernel_w, const int kernel_h, const int stride, + const int c_in, const int c_out, const int batches) { + const int global_idx = blockIdx.x * blockDim.x + threadIdx.x; + + const int total_elements = out_w * out_h * c_out * batches; + + if (global_idx >= total_elements) { + return; + } + + const int out_x_idx = global_idx % out_w; + const int out_y_idx = (global_idx / out_w) % out_h; + const int c_idx = (global_idx / (out_w * out_h)) % c_out; + const int n_idx = global_idx / (out_w * out_h * c_out); + + float accumulator = 0; + // For each output idx, find the inputs that contribute to it by checking stride alignment and bounds + + for (int c_in_idx = 0; c_in_idx < c_in; c_in_idx++) { + for (int kh = 0; kh < kernel_h; ++kh) { + int in_y = out_y_idx - kh; + if (in_y < 0 || in_y % stride) continue; + in_y /= stride; + if (in_y >= in_h) continue; + + for (int kw = 0; kw < kernel_w; ++kw) { + int in_x = out_x_idx - kw; + if (in_x < 0 || in_x % stride) continue; + in_x /= stride; + if (in_x >= in_w) continue; + + const int input_idx = (in_w * in_h * c_in) * n_idx + (in_w * in_h) * c_in_idx + (in_w) *in_y + in_x; + const int kernel_idx = + (kernel_h * kernel_w * c_out) * c_in_idx + (kernel_h * kernel_w) * c_idx + (kernel_w) *kh + kw; + + float input_val = input[input_idx]; + half kern_val = kernel[kernel_idx]; + + accumulator += input_val * (float) kern_val; + } + } + } + + output[(out_w * out_h * c_out) * n_idx + (out_w * out_h) * c_idx + (out_w) *out_y_idx + out_x_idx] = accumulator; +} + +//input is (W, H, C_in, N), Kernel is (W, H, C_out, C_in) +void ggml_cuda_conv_2d_transpose_p0(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * kernel = dst->src[0]; + const ggml_tensor * input = dst->src[1]; + + GGML_ASSERT(kernel->type == GGML_TYPE_F16 && input->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); + + const float * input_data = (const float *) input->data; + float * output_data = (float *) dst->data; + const half * kernel_data = (const half *) kernel->data; + + const int input_w = input->ne[0]; + const int input_h = input->ne[1]; + const int output_w = dst->ne[0]; + const int output_h = dst->ne[1]; + const int channels_in = input->ne[2]; + const int channels_out = kernel->ne[2]; + const int kernel_w = kernel->ne[0]; + const int kernel_h = kernel->ne[1]; + const int stride = dst->op_params[0]; + const int batches = input->ne[3]; + + GGML_ASSERT(channels_in == kernel->ne[3]); + GGML_ASSERT(stride > 0); + + cudaStream_t st = ctx.stream(); + + GGML_ASSERT(ggml_is_contiguous(input)); + GGML_ASSERT(ggml_is_contiguous(kernel)); + GGML_ASSERT(ggml_is_contiguous(dst)); + + const int total = (output_w * output_h * channels_out * batches); + const int blocks = (total + CUDA_CONV2D_TRANSPOSE_BLOCK_SIZE - 1) / CUDA_CONV2D_TRANSPOSE_BLOCK_SIZE; + + conv2d_transpose_kernel<<>>( + input_data, kernel_data, output_data, input_w, input_h, output_w, output_h, kernel_w, kernel_h, stride, + channels_in, channels_out, batches); +} diff --git a/ggml/src/ggml-cuda/conv2d-transpose.cuh b/ggml/src/ggml-cuda/conv2d-transpose.cuh new file mode 100644 index 0000000000000..c9430b2485021 --- /dev/null +++ b/ggml/src/ggml-cuda/conv2d-transpose.cuh @@ -0,0 +1,4 @@ +#include "common.cuh" + +#define CUDA_CONV2D_TRANSPOSE_BLOCK_SIZE 256 +void ggml_cuda_conv_2d_transpose_p0(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml/src/ggml-cuda/convert.cu index c6dec4276b36d..eeaa14bf57950 100644 --- a/ggml/src/ggml-cuda/convert.cu +++ b/ggml/src/ggml-cuda/convert.cu @@ -728,3 +728,25 @@ to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type) { return nullptr; } } + +to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type) { + switch (type) { + case GGML_TYPE_F32: + return convert_unary_cuda; + case GGML_TYPE_F16: + return convert_unary_cuda; + default: + return nullptr; + } +} + +to_fp32_nc_cuda_t ggml_get_to_fp32_nc_cuda(ggml_type type) { + switch (type) { + case GGML_TYPE_F16: + return convert_unary_cuda; + case GGML_TYPE_BF16: + return convert_unary_cuda; + default: + return nullptr; + } +} diff --git a/ggml/src/ggml-cuda/convert.cuh b/ggml/src/ggml-cuda/convert.cuh index b65b98e08e7e2..f04214be175ba 100644 --- a/ggml/src/ggml-cuda/convert.cuh +++ b/ggml/src/ggml-cuda/convert.cuh @@ -22,5 +22,10 @@ using to_t_nc_cuda_t = void (*)(const void * x, T * y, int64_t ne00, int64_t ne01, int64_t ne02, int64_t ne03, int64_t s01, int64_t s02, int64_t s03, cudaStream_t stream); +typedef to_t_nc_cuda_t to_fp32_nc_cuda_t; typedef to_t_nc_cuda_t to_fp16_nc_cuda_t; +typedef to_t_nc_cuda_t to_bf16_nc_cuda_t; + +to_fp32_nc_cuda_t ggml_get_to_fp32_nc_cuda(ggml_type type); to_fp16_nc_cuda_t ggml_get_to_fp16_nc_cuda(ggml_type type); +to_bf16_nc_cuda_t ggml_get_to_bf16_nc_cuda(ggml_type type); diff --git a/ggml/src/ggml-cuda/cross-entropy-loss.cu b/ggml/src/ggml-cuda/cross-entropy-loss.cu index 0ce4afbb222bd..0c8b0819724e4 100644 --- a/ggml/src/ggml-cuda/cross-entropy-loss.cu +++ b/ggml/src/ggml-cuda/cross-entropy-loss.cu @@ -123,13 +123,7 @@ void ggml_cuda_cross_entropy_loss(ggml_backend_cuda_context & ctx, ggml_tensor * ggml_cuda_pool_alloc dst_tmp(pool, blocks_num.x); if (nbytes_shared <= smpbo) { -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) - static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false}; - if (!shared_memory_limit_raised[id]) { - CUDA_CHECK(cudaFuncSetAttribute(cross_entropy_loss_f32, cudaFuncAttributeMaxDynamicSharedMemorySize, smpbo)); - shared_memory_limit_raised[id] = true; - } -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) + CUDA_SET_SHARED_MEMORY_LIMIT((cross_entropy_loss_f32), smpbo); cross_entropy_loss_f32<<>>(src0_d, src1_d, dst_tmp.ptr, ne00, nrows); } else { cross_entropy_loss_f32<<>>(src0_d, src1_d, dst_tmp.ptr, ne00, nrows); @@ -175,13 +169,7 @@ void ggml_cuda_cross_entropy_loss_back(ggml_backend_cuda_context & ctx, ggml_ten const size_t smpbo = ggml_cuda_info().devices[id].smpbo; if (nbytes_shared <= smpbo) { -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) - static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false}; - if (!shared_memory_limit_raised[id]) { - CUDA_CHECK(cudaFuncSetAttribute(cross_entropy_loss_back_f32, cudaFuncAttributeMaxDynamicSharedMemorySize, smpbo)); - shared_memory_limit_raised[id] = true; - } -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) + CUDA_SET_SHARED_MEMORY_LIMIT((cross_entropy_loss_back_f32), smpbo); cross_entropy_loss_back_f32<<>>(grad_d, src0f_d, src1f_d, dst_d, ne00); } else { cross_entropy_loss_back_f32<<>>(grad_d, src0f_d, src1f_d, dst_d, ne00); diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index cfab2b5ebaccc..9122fca6cf99f 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -32,7 +32,11 @@ typedef void (* fattn_kernel_t)( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -519,7 +523,7 @@ constexpr __device__ dequantize_1_f32_t get_dequantize_1_f32(ggml_type type_V) { template // D == head size __launch_bounds__(D, 1) static __global__ void flash_attn_stream_k_fixup( - float * __restrict__ dst, const float2 * __restrict__ dst_fixup, const int ne01, const int ne02, const int ne11) { + float * __restrict__ dst, const float2 * __restrict__ dst_fixup, const int ne01, const int ne02, const int ne03, const int ne11) { constexpr int ncols = ncols1*ncols2; const int bidx0 = blockIdx.x; @@ -533,8 +537,8 @@ static __global__ void flash_attn_stream_k_fixup( const int iter_k = ne11 / FATTN_KQ_STRIDE; const int iter_j = (ne01 + (ncols1 - 1)) / ncols1; - const int kbc0 = (bidx0 + 0)*iter_k*iter_j*(ne02/ncols2) / gridDim.x; - const int kbc0_stop = (bidx0 + 1)*iter_k*iter_j*(ne02/ncols2) / gridDim.x; + const int kbc0 = (bidx0 + 0)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x; + const int kbc0_stop = (bidx0 + 1)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x; const bool did_not_have_any_data = kbc0 == kbc0_stop; const bool wrote_beginning_of_tile = kbc0 % iter_k == 0; @@ -543,14 +547,15 @@ static __global__ void flash_attn_stream_k_fixup( return; } - const int channel = kbc0 / (iter_k*iter_j); - const int jt = (kbc0 - channel*iter_k*iter_j) / iter_k; + const int sequence = kbc0 / (iter_k*iter_j*(ne02/ncols2)); + const int head = (kbc0 - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j); + const int jt = (kbc0 - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*head) / iter_k; // j index of current tile. if (jt*ncols1 + j >= ne01) { return; } - dst += jt*ne02*(ncols1*D) + channel*(ncols2*D) + (j*ne02 + c)*D + tid; + dst += sequence*ne02*ne01*D + jt*ne02*(ncols1*D) + head*(ncols2*D) + (j*ne02 + c)*D + tid; // Load the partial result that needs a fixup: float dst_val = 0.0f; @@ -569,7 +574,7 @@ static __global__ void flash_attn_stream_k_fixup( int bidx = bidx0 - 1; int kbc_stop = kbc0; while(true) { - const int kbc = bidx*iter_k*iter_j*(ne02/ncols2) / gridDim.x; + const int kbc = bidx*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x; if (kbc == kbc_stop) { // Did not have any data. bidx--; kbc_stop = kbc; @@ -615,16 +620,31 @@ static __global__ void flash_attn_combine_results( const float2 * __restrict__ VKQ_meta, float * __restrict__ dst, const int parallel_blocks) { - VKQ_parts += parallel_blocks*D * gridDim.z*blockIdx.x; - VKQ_meta += parallel_blocks * gridDim.z*blockIdx.x; - dst += D * gridDim.z*blockIdx.x; + // Dimension 0: threadIdx.x + // Dimension 1: blockIdx.x + // Dimension 2: blockIdx.y + // Dimension 3: blockIdx.z + // Memory layout is permuted with [0, 2, 1, 3] + + const int ne01 = gridDim.x; + const int ne02 = gridDim.y; + + const int col = blockIdx.x; + const int head = blockIdx.y; + const int sequence = blockIdx.z; + + const int j_dst_unrolled = (sequence*ne01 + col)*ne02 + head; + + VKQ_parts += j_dst_unrolled * parallel_blocks*D; + VKQ_meta += j_dst_unrolled * parallel_blocks; + dst += j_dst_unrolled * D; const int tid = threadIdx.x; __builtin_assume(tid < D); extern __shared__ float2 meta[]; for (int i = tid; i < 2*parallel_blocks; i += D) { - ((float *) meta)[i] = ((const float *)VKQ_meta) [blockIdx.z*(2*parallel_blocks) + i]; + ((float *) meta)[i] = ((const float *)VKQ_meta) [i]; } __syncthreads(); @@ -642,11 +662,11 @@ static __global__ void flash_attn_combine_results( const uint32_t ftz_mask = 0xFFFFFFFF * (diff > SOFTMAX_FTZ_THRESHOLD); *((uint32_t *) &KQ_max_scale) &= ftz_mask; - VKQ_numerator += KQ_max_scale * VKQ_parts[l*gridDim.z*D + blockIdx.z*D + tid]; + VKQ_numerator += KQ_max_scale * VKQ_parts[l*D + tid]; VKQ_denominator += KQ_max_scale * meta[l].y; } - dst[blockIdx.z*D + tid] = VKQ_numerator / VKQ_denominator; + dst[tid] = VKQ_numerator / VKQ_denominator; } [[noreturn]] @@ -703,8 +723,6 @@ void launch_fattn( GGML_ASSERT(K->ne[1] % FATTN_KQ_STRIDE == 0 && "Incorrect KV cache padding."); - GGML_ASSERT(Q->ne[3] == 1); - ggml_cuda_pool & pool = ctx.pool(); cudaStream_t main_stream = ctx.stream(); const int id = ggml_cuda_get_device(); @@ -851,7 +869,8 @@ void launch_fattn( scale, max_bias, m0, m1, n_head_log2, logit_softcap, Q->ne[0], Q->ne[1], Q->ne[2], Q->ne[3], K->ne[0], K->ne[1], K->ne[2], K->ne[3], - mask ? mask->ne[1] : 0, mask ? mask->nb[1] : 0, + mask ? mask->ne[1] : 0, mask ? mask->ne[2] : 0, mask ? mask->ne[3] : 0, + mask ? mask->nb[1] : 0, mask ? mask->nb[2] : 0, mask ? mask->nb[3] : 0, Q->nb[1], Q->nb[2], Q->nb[3], nb11, nb12, nb13, nb21, nb22, nb23, @@ -866,11 +885,11 @@ void launch_fattn( flash_attn_stream_k_fixup <<>> - ((float *) KQV->data, dst_tmp_meta.ptr, Q->ne[1], Q->ne[2], K->ne[1]); + ((float *) KQV->data, dst_tmp_meta.ptr, Q->ne[1], Q->ne[2], Q->ne[3], K->ne[1]); } } else if (parallel_blocks > 1) { const dim3 block_dim_combine(DV, 1, 1); - const dim3 blocks_num_combine(Q->ne[1], 1, blocks_num.z); + const dim3 blocks_num_combine(Q->ne[1], Q->ne[2], Q->ne[3]); const size_t nbytes_shared_combine = parallel_blocks*sizeof(float2); flash_attn_combine_results diff --git a/ggml/src/ggml-cuda/fattn-mma-f16.cuh b/ggml/src/ggml-cuda/fattn-mma-f16.cuh index e230f6d494d77..6fa2e77299eb0 100644 --- a/ggml/src/ggml-cuda/fattn-mma-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-mma-f16.cuh @@ -1223,7 +1223,11 @@ static __global__ void flash_attn_ext_f16( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -1272,8 +1276,8 @@ static __global__ void flash_attn_ext_f16( constexpr int kb_niter = FATTN_KQ_STRIDE / c::nbatch_fa; // Number of kernel iterations per assigned KQ slice. // kbc == k block continuous, current index in continuous ijk space. - int kbc = (blockIdx.x + 0)*iter_k*iter_j*(ne02/ncols2) / gridDim.x; - const int kbc_stop = (blockIdx.x + 1)*iter_k*iter_j*(ne02/ncols2) / gridDim.x; + int kbc = (blockIdx.x + 0)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x; + const int kbc_stop = (blockIdx.x + 1)*(iter_k*iter_j*(ne02/ncols2)*ne03) / gridDim.x; // If the seams of 2 CUDA blocks fall within an output tile their results need to be combined. // For this we need to track both the block that starts the tile (needs_fixup) and the block that finishes the tile (is_fixup). @@ -1283,17 +1287,19 @@ static __global__ void flash_attn_ext_f16( int kb0_start = kbc % iter_k; int kb0_stop = min(iter_k, kb0_start + kbc_stop - kbc); while (kbc < kbc_stop && kb0_stop == iter_k) { - const int channel = kbc / (iter_k*iter_j); - const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + const int sequence = kbc / (iter_k*iter_j*(ne02/ncols2)); + const int head = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j); + const int jt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*head) / iter_k; // j index of current tile. - const float2 * Q_f2 = (const float2 *) (Q + nb02* channel*ncols2); - const half2 * K_h2 = (const half2 *) (K + nb12*(channel*ncols2 / gqa_ratio)); - const half2 * mask_h2 = ncols2 > 1 || mask ? (const half2 *) mask + (nb31/sizeof(half2))*jt*ncols1 : nullptr; - float2 * dstk = ((float2 *) dst) + channel*(ncols2 * DV/2); + const float2 * Q_f2 = (const float2 *) (Q + nb03*sequence + nb02*(head*ncols2)); + const half2 * K_h2 = (const half2 *) (K + nb13*sequence + nb12*(head*ncols2 / gqa_ratio)); + const half2 * mask_h2 = ncols2 == 1 && !mask ? nullptr : + (const half2 *) (mask + nb33*(sequence % ne33) + nb31*jt*ncols1); + float2 * dstk = ((float2 *) dst) + (sequence*ne01*ne02 + head*ncols2) * (DV/2); - const half2 * V_h2 = mla ? K_h2 + (DKQ/2 - DV/2) : (const half2 *) (V + nb22*(channel*ncols2 / gqa_ratio)); + const half2 * V_h2 = mla ? K_h2 + (DKQ/2 - DV/2) : (const half2 *) (V + nb23*sequence + nb22*(head*ncols2 / gqa_ratio)); - const float slope = ncols2 == 1 ? get_alibi_slope(max_bias, channel, n_head_log2, m0, m1) : 1.0f; + const float slope = ncols2 == 1 ? get_alibi_slope(max_bias, head, n_head_log2, m0, m1) : 1.0f; const int kb0_start_kernel = kb0_start * kb_niter; const int kb0_stop_kernel = kb0_stop * kb_niter; @@ -1322,17 +1328,19 @@ static __global__ void flash_attn_ext_f16( return; } - const int channel = kbc / (iter_k*iter_j); - const int jt = (kbc - channel*iter_k*iter_j) / iter_k; // j index of current tile. + const int sequence = kbc / (iter_k*iter_j*(ne02/ncols2)); + const int head = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence) / (iter_k*iter_j); + const int jt = (kbc - iter_k*iter_j*(ne02/ncols2)*sequence - iter_k*iter_j*head) / iter_k; // j index of current tile. - const float2 * Q_f2 = (const float2 *) (Q + nb02* channel*ncols2); - const half2 * K_h2 = (const half2 *) (K + nb12*(channel*ncols2 / gqa_ratio)); - const half2 * mask_h2 = ncols2 > 1 || mask ? (const half2 *) mask + (nb31/sizeof(half2))*jt*ncols1 : nullptr; - float2 * dstk = ((float2 *) dst) + channel*(ncols2 * DV/2); + const float2 * Q_f2 = (const float2 *) (Q + nb03*sequence + nb02*(head*ncols2)); + const half2 * K_h2 = (const half2 *) (K + nb13*sequence + nb12*(head*ncols2 / gqa_ratio)); + const half2 * mask_h2 = ncols2 == 1 && !mask ? nullptr : + (const half2 *) (mask + nb33*(sequence % ne33) + nb31*jt*ncols1); + float2 * dstk = ((float2 *) dst) + (sequence*ne01*ne02 + head*ncols2) * (DV/2); - const half2 * V_h2 = mla ? K_h2 + (DKQ/2 - DV/2) : (const half2 *) (V + nb22*(channel*ncols2 / gqa_ratio)); + const half2 * V_h2 = mla ? K_h2 + (DKQ/2 - DV/2) : (const half2 *) (V + nb23*sequence + nb22*(head*ncols2 / gqa_ratio)); - const float slope = ncols2 == 1 ? get_alibi_slope(max_bias, channel, n_head_log2, m0, m1) : 1.0f; + const float slope = ncols2 == 1 ? get_alibi_slope(max_bias, head, n_head_log2, m0, m1) : 1.0f; const int kb0_start_kernel = kb0_start * kb_niter; const int kb0_stop_kernel = kb0_stop * kb_niter; @@ -1348,8 +1356,8 @@ static __global__ void flash_attn_ext_f16( GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1); GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); - GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); + GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); GGML_UNUSED(ne32); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); GGML_UNUSED(ne2); GGML_UNUSED(ne3); diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml/src/ggml-cuda/fattn-tile-f16.cu index 9283560d5c4ee..1f141328845a4 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f16.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f16.cu @@ -6,7 +6,7 @@ template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(nwarps*WARP_SIZE, 1) +__launch_bounds__(nwarps*WARP_SIZE, 2) #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) static __global__ void flash_attn_tile_ext_f16( const char * __restrict__ Q, @@ -30,7 +30,11 @@ static __global__ void flash_attn_tile_ext_f16( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -60,15 +64,17 @@ static __global__ void flash_attn_tile_ext_f16( const int ic0 = blockIdx.x * ncols; // Index of the Q/QKV column to work on. + const int sequence = blockIdx.z / ne02; + const int head = blockIdx.z - sequence*ne02; const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float2 * Q_f2 = (const float2 *) (Q + nb02* blockIdx.z + nb01*ic0); - const half2 * K_h2 = (const half2 *) (K + nb12*(blockIdx.z / gqa_ratio)); - const half2 * V_h2 = (const half2 *) (V + nb12*(blockIdx.z / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + ne11*ic0; + const float2 * Q_f2 = (const float2 *) (Q + nb03* sequence + nb02* head + nb01*ic0); + const half2 * K_h2 = (const half2 *) (K + nb13* sequence + nb12*(head / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb13* sequence + nb12*(head / gqa_ratio)); // K and V have same shape + const half * maskh = (const half *) (mask + nb33*(sequence % ne33) + nb31*ic0); const int stride_KV2 = nb11 / sizeof(half2); - const float slopef = get_alibi_slope(max_bias, blockIdx.z, n_head_log2, m0, m1); + const float slopef = get_alibi_slope(max_bias, head, n_head_log2, m0, m1); const half slopeh = __float2half(slopef); static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); @@ -253,6 +259,8 @@ static __global__ void flash_attn_tile_ext_f16( __syncthreads(); } + float2 * dst2 = (float2 *) dst; + #pragma unroll for (int j_VKQ_0 = 0; j_VKQ_0 < ncols; j_VKQ_0 += nwarps) { const int j_VKQ = j_VKQ_0 + threadIdx.y; @@ -264,21 +272,21 @@ static __global__ void flash_attn_tile_ext_f16( half kqsum_j = __low2half(kqsum[j_VKQ_0/nwarps]) + __high2half(kqsum[j_VKQ_0/nwarps]); kqsum_j = warp_reduce_sum((float)kqsum_j); + const int j_dst_unrolled = ((sequence*ne01 + ic0 + j_VKQ)*ne02 + head)*gridDim.y + blockIdx.y; + #pragma unroll - for (int i00 = 0; i00 < D; i00 += 2*WARP_SIZE) { - const int i0 = i00 + 2*threadIdx.x; + for (int i00 = 0; i00 < D/2; i00 += WARP_SIZE) { + const int i0 = i00 + threadIdx.x; - half2 dst_val = VKQ[j_VKQ_0/nwarps][i0/(2*WARP_SIZE)]; + half2 dst_val = VKQ[j_VKQ_0/nwarps][i0/WARP_SIZE]; if (gridDim.y == 1) { dst_val /= __half2half2(kqsum_j); } - const int j_dst = (ic0 + j_VKQ)*gridDim.y + blockIdx.y; - dst[j_dst*D*gridDim.z + D*blockIdx.z + i0 + 0] = __low2float(dst_val); - dst[j_dst*D*gridDim.z + D*blockIdx.z + i0 + 1] = __high2float(dst_val); + dst2[j_dst_unrolled*(D/2) + i0] = __half22float2(dst_val); } if (gridDim.y != 1 && threadIdx.x == 0) { - dst_meta[((ic0 + j_VKQ)*gridDim.z + blockIdx.z) * gridDim.y + blockIdx.y] = make_float2(kqmax[j_VKQ_0/nwarps], kqsum_j); + dst_meta[j_dst_unrolled] = make_float2(kqmax[j_VKQ_0/nwarps], kqsum_j); } } #else @@ -288,8 +296,8 @@ static __global__ void flash_attn_tile_ext_f16( GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); - GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); + GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cu b/ggml/src/ggml-cuda/fattn-tile-f32.cu index 32673adb57fc1..a4965583cef1c 100644 --- a/ggml/src/ggml-cuda/fattn-tile-f32.cu +++ b/ggml/src/ggml-cuda/fattn-tile-f32.cu @@ -6,7 +6,7 @@ template // D == head size #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) -__launch_bounds__(nwarps*WARP_SIZE, 1) +__launch_bounds__(nwarps*WARP_SIZE, 2) #endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) static __global__ void flash_attn_tile_ext_f32( const char * __restrict__ Q, @@ -30,7 +30,11 @@ static __global__ void flash_attn_tile_ext_f32( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -58,8 +62,8 @@ static __global__ void flash_attn_tile_ext_f32( GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); - GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); + GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); GGML_UNUSED(ne32); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); @@ -72,15 +76,17 @@ static __global__ void flash_attn_tile_ext_f32( const int ic0 = blockIdx.x * ncols; // Index of the Q/QKV column to work on. + const int sequence = blockIdx.z / ne02; + const int head = blockIdx.z - sequence*ne02; const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float2 * Q_f2 = (const float2 *) (Q + nb02* blockIdx.z + nb01*ic0); - const half2 * K_h2 = (const half2 *) (K + nb12*(blockIdx.z / gqa_ratio)); - const half2 * V_h2 = (const half2 *) (V + nb12*(blockIdx.z / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + ne11*ic0; + const float2 * Q_f2 = (const float2 *) (Q + nb03* sequence + nb02* head + nb01*ic0); + const half2 * K_h2 = (const half2 *) (K + nb13* sequence + nb12*(head / gqa_ratio)); + const half2 * V_h2 = (const half2 *) (V + nb13* sequence + nb12*(head / gqa_ratio)); // K and V have same shape + const half * maskh = (const half *) (mask + nb33*(sequence % ne33) + nb31*ic0); const int stride_KV2 = nb11 / sizeof(half2); - const float slope = get_alibi_slope(max_bias, blockIdx.z, n_head_log2, m0, m1); + const float slope = get_alibi_slope(max_bias, head, n_head_log2, m0, m1); static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); @@ -263,6 +269,8 @@ static __global__ void flash_attn_tile_ext_f32( __syncthreads(); } + float2 * dst2 = (float2 *) dst; + #pragma unroll for (int j_VKQ_0 = 0; j_VKQ_0 < ncols; j_VKQ_0 += nwarps) { const int j_VKQ = j_VKQ_0 + threadIdx.y; @@ -274,22 +282,22 @@ static __global__ void flash_attn_tile_ext_f32( float kqsum_j = kqsum[j_VKQ_0/nwarps]; kqsum_j = warp_reduce_sum(kqsum_j); + const int j_dst_unrolled = ((sequence*ne01 + ic0 + j_VKQ)*ne02 + head)*gridDim.y + blockIdx.y; + #pragma unroll - for (int i00 = 0; i00 < D; i00 += 2*WARP_SIZE) { - const int i0 = i00 + 2*threadIdx.x; + for (int i00 = 0; i00 < D/2; i00 += WARP_SIZE) { + const int i0 = i00 + threadIdx.x; - float2 dst_val = VKQ[j_VKQ_0/nwarps][i0/(2*WARP_SIZE)]; + float2 dst_val = VKQ[j_VKQ_0/nwarps][i0/WARP_SIZE]; if (gridDim.y == 1) { dst_val.x /= kqsum_j; dst_val.y /= kqsum_j; } - const int j_dst = (ic0 + j_VKQ)*gridDim.y + blockIdx.y; - dst[j_dst*D*gridDim.z + D*blockIdx.z + i0 + 0] = dst_val.x; - dst[j_dst*D*gridDim.z + D*blockIdx.z + i0 + 1] = dst_val.y; + dst2[j_dst_unrolled*(D/2) + i0] = dst_val; } if (gridDim.y != 1 && threadIdx.x == 0) { - dst_meta[((ic0 + j_VKQ)*gridDim.z + blockIdx.z) * gridDim.y + blockIdx.y] = make_float2(kqmax[j_VKQ_0/nwarps], kqsum_j); + dst_meta[j_dst_unrolled] = make_float2(kqmax[j_VKQ_0/nwarps], kqsum_j); } } #else @@ -297,14 +305,14 @@ static __global__ void flash_attn_tile_ext_f32( GGML_UNUSED(dst); GGML_UNUSED(dst_meta); GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1); GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); - GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); - GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); - GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); - GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); - GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); - GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); - GGML_UNUSED(ne2); GGML_UNUSED(ne3); + GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); + GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); + GGML_UNUSED(ne31); GGML_UNUSED(ne32); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); + GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); + GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); + GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); + GGML_UNUSED(ne0); GGML_UNUSED(ne1); GGML_UNUSED(ne2); GGML_UNUSED(ne3); NO_DEVICE_CODE; #endif // FLASH_ATTN_AVAILABLE } diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml/src/ggml-cuda/fattn-vec-f16.cuh index 35e649cb3c81b..b2d469938abf2 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f16.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f16.cuh @@ -27,7 +27,11 @@ static __global__ void flash_attn_vec_ext_f16( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -63,14 +67,16 @@ static __global__ void flash_attn_vec_ext_f16( const int ic0 = blockIdx.x * ncols; // Index of the Q/QKV column to work on. + const int sequence = blockIdx.z / ne02; + const int head = blockIdx.z - sequence*ne02; const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - Q += nb02* blockIdx.z + nb01*ic0; - K += nb12*(blockIdx.z / gqa_ratio); - V += nb22*(blockIdx.z / gqa_ratio); + Q += nb03*sequence + nb02* head + nb01*ic0; + K += nb13*sequence + nb12*(head / gqa_ratio); + V += nb23*sequence + nb22*(head / gqa_ratio); - const half * maskh = (const half *) mask + ne11*ic0; + const half * maskh = (const half *) (mask + nb33*(sequence % ne33) + nb31*ic0); - const float slopef = get_alibi_slope(max_bias, blockIdx.z, n_head_log2, m0, m1); + const float slopef = get_alibi_slope(max_bias, head, n_head_log2, m0, m1); const half slopeh = __float2half(slopef); static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); @@ -328,12 +334,11 @@ static __global__ void flash_attn_vec_ext_f16( if (gridDim.y == 1) { dst_val /= kqsum[j_VKQ]; } - const int j_dst = (ic0 + j_VKQ)*gridDim.y + blockIdx.y; - dst[j_dst*D*gridDim.z + D*blockIdx.z + tid] = dst_val; + dst[(((sequence*ne01 + ic0 + j_VKQ)*ne02 + head)*gridDim.y + blockIdx.y)*D + tid] = dst_val; } if (gridDim.y != 1 && tid < ncols && (ncols <= 2 || ic0 + tid < ne01)) { - dst_meta[((ic0 + tid)*gridDim.z + blockIdx.z) * gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]); + dst_meta[((sequence*ne01 + ic0 + tid)*ne02 + head)*gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]); } #else GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask); @@ -342,8 +347,8 @@ static __global__ void flash_attn_vec_ext_f16( GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); - GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); + GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne32); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml/src/ggml-cuda/fattn-vec-f32.cuh index 9539679177969..405b6f5106ea0 100644 --- a/ggml/src/ggml-cuda/fattn-vec-f32.cuh +++ b/ggml/src/ggml-cuda/fattn-vec-f32.cuh @@ -27,7 +27,11 @@ static __global__ void flash_attn_vec_ext_f32( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -51,8 +55,8 @@ static __global__ void flash_attn_vec_ext_f32( GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); - GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); + GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); @@ -75,13 +79,16 @@ static __global__ void flash_attn_vec_ext_f32( const int ic0 = blockIdx.x * ncols; // Index of the Q/QKV column to work on. + const int sequence = blockIdx.z / ne02; + const int head = blockIdx.z - sequence*ne02; const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - Q += nb02* blockIdx.z + nb01*ic0; - K += nb12*(blockIdx.z / gqa_ratio); - V += nb22*(blockIdx.z / gqa_ratio); // K and V have same shape - const half * maskh = (const half *) mask + ne11*ic0; + Q += nb03*sequence + nb02* head + nb01*ic0; + K += nb13*sequence + nb12*(head / gqa_ratio); + V += nb23*sequence + nb22*(head / gqa_ratio); - const float slope = get_alibi_slope(max_bias, blockIdx.z, n_head_log2, m0, m1); + const half * maskh = (const half *) (mask + nb33*(sequence % ne33) + nb31*ic0); + + const float slope = get_alibi_slope(max_bias, head, n_head_log2, m0, m1); static_assert(D % (2*WARP_SIZE) == 0, "D not divisible by 2*WARP_SIZE == 64."); constexpr int nwarps = D / WARP_SIZE; @@ -323,24 +330,25 @@ static __global__ void flash_attn_vec_ext_f32( if (gridDim.y == 1) { dst_val /= kqsum[j_VKQ]; } - const int j_dst = (ic0 + j_VKQ)*gridDim.y + blockIdx.y; - dst[j_dst*D*gridDim.z + D*blockIdx.z + tid] = dst_val; + dst[(((sequence*ne01 + ic0 + j_VKQ)*ne02 + head)*gridDim.y + blockIdx.y)*D + tid] = dst_val; } if (gridDim.y != 1 && tid < ncols && (ncols <= 2 || ic0 + tid < ne01)) { - dst_meta[((ic0 + tid)*gridDim.z + blockIdx.z) * gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]); + dst_meta[((sequence*ne01 + ic0 + tid)*ne02 + head)*gridDim.y + blockIdx.y] = make_float2(kqmax[tid], kqsum[tid]); } #else GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask); GGML_UNUSED(dst); GGML_UNUSED(dst_meta); GGML_UNUSED(scale); GGML_UNUSED(max_bias); GGML_UNUSED(m0); GGML_UNUSED(m1); - GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); - GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); - GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); GGML_UNUSED(ne31); - GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); - GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); - GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); - GGML_UNUSED(ne2); GGML_UNUSED(ne3); + GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); + GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); + GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); + GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33); + GGML_UNUSED(nb31); GGML_UNUSED(nb32); GGML_UNUSED(nb33); + GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); + GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); + GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); + GGML_UNUSED(ne0); GGML_UNUSED(ne1); GGML_UNUSED(ne2); GGML_UNUSED(ne3); NO_DEVICE_CODE; #endif // FLASH_ATTN_AVAILABLE } diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cu b/ggml/src/ggml-cuda/fattn-wmma-f16.cu index c5668adb152b2..741b8781d29f5 100644 --- a/ggml/src/ggml-cuda/fattn-wmma-f16.cu +++ b/ggml/src/ggml-cuda/fattn-wmma-f16.cu @@ -9,7 +9,11 @@ #ifdef FP16_MMA_AVAILABLE #if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) #include +#ifdef GGML_USE_MUSA +namespace wmma = mtmusa::wmma; +#else // GGML_USE_MUSA namespace wmma = nvcuda::wmma; +#endif // GGML_USE_MUSA #elif defined(GGML_HIP_ROCWMMA_FATTN) && defined(FP16_MMA_AVAILABLE) #undef HIP_ENABLE_WARP_SYNC_BUILTINS // conflicts with rocWMMA headers #include @@ -42,7 +46,11 @@ static __global__ void flash_attn_ext_f16( const int ne12, const int ne13, const int ne31, + const int ne32, + const int ne33, const int nb31, + const int nb32, + const int nb33, const int nb01, const int nb02, const int nb03, @@ -89,17 +97,19 @@ static __global__ void flash_attn_ext_f16( constexpr int kqs_padded = FATTN_KQ_STRIDE + 8; constexpr int kqar = sizeof(KQ_acc_t)/sizeof(half); + const int sequence = blockIdx.z / ne02; + const int head = blockIdx.z - sequence*ne02; const int gqa_ratio = ne02 / ne12; // With grouped query attention there are > 1 Q matrices per K, V matrix. - const float * Q_f = (const float *) (Q + nb02* blockIdx.z + nb01*ic0); - const half * K_h = (const half *) (K + nb12*(blockIdx.z / gqa_ratio)); - const half * V_h = (const half *) (V + nb12*(blockIdx.z / gqa_ratio)); // K and V have same shape - const half * maskh = (const half *) mask + (nb31/sizeof(half))* ic0; - const half2 * mask2 = (const half2 *) mask + (nb31/sizeof(half))*(ic0/2); + const float * Q_f = (const float *) (Q + nb03* sequence + nb02* head + nb01*ic0); + const half * K_h = (const half *) (K + nb13* sequence + nb12*(head / gqa_ratio)); + const half * V_h = (const half *) (V + nb13* sequence + nb12*(head / gqa_ratio)); // K and V have same shape + const half * maskh = (const half *) (mask + nb33*(sequence % ne33) + nb31*ic0); + const half2 * mask2 = (const half2 *) maskh; const int stride_Q = nb01 / sizeof(float); const int stride_KV = nb11 / sizeof(half); - const float slopef = get_alibi_slope(max_bias, blockIdx.z, n_head_log2, m0, m1); + const float slopef = get_alibi_slope(max_bias, head, n_head_log2, m0, m1); const half slopeh = __float2half(slopef); const half2 slope2 = make_half2(slopef, slopef); @@ -394,7 +404,6 @@ static __global__ void flash_attn_ext_f16( if (ic0 + j_VKQ >= ne01) { return; } - const int j_dst = (ic0 + j_VKQ)*gridDim.y + blockIdx.y; float KQ_rowsum_j; if (std::is_same::value) { @@ -403,6 +412,8 @@ static __global__ void flash_attn_ext_f16( KQ_rowsum_j = __low2float(KQ_rowsum_h2[j0/nwarps]) + __high2float(KQ_rowsum_h2[j0/nwarps]); } + const int j_dst_unrolled = ((sequence*ne01 + ic0 + j_VKQ)*ne02 + head)*gridDim.y + blockIdx.y; + #pragma unroll for (int i0 = 0; i0 < D; i0 += warp_size) { const int i = i0 + threadIdx.x; @@ -413,7 +424,7 @@ static __global__ void flash_attn_ext_f16( if (gridDim.y == 1) { dst_val /= KQ_rowsum_j; } - dst[j_dst*gridDim.z*D + blockIdx.z*D + i] = dst_val; + dst[j_dst_unrolled*D + i] = dst_val; } if (gridDim.y == 1 || threadIdx.x != 0) { @@ -427,7 +438,7 @@ static __global__ void flash_attn_ext_f16( dst_meta_val.x = __low2float(KQ_max_h2[j0/nwarps]); } dst_meta_val.y = KQ_rowsum_j; - dst_meta[((ic0 + j_VKQ)*gridDim.z + blockIdx.z) * gridDim.y + blockIdx.y] = dst_meta_val; + dst_meta[j_dst_unrolled] = dst_meta_val; } #else GGML_UNUSED(Q); GGML_UNUSED(K); GGML_UNUSED(V); GGML_UNUSED(mask); @@ -436,7 +447,8 @@ static __global__ void flash_attn_ext_f16( GGML_UNUSED(n_head_log2); GGML_UNUSED(logit_softcap); GGML_UNUSED(ne00); GGML_UNUSED(ne01); GGML_UNUSED(ne02); GGML_UNUSED(ne03); GGML_UNUSED(ne10); GGML_UNUSED(ne11); GGML_UNUSED(ne12); GGML_UNUSED(ne13); - GGML_UNUSED(ne31); GGML_UNUSED(nb31); GGML_UNUSED(nb01); GGML_UNUSED(nb02); + GGML_UNUSED(ne31); GGML_UNUSED(ne32); GGML_UNUSED(ne33); GGML_UNUSED(nb31); + GGML_UNUSED(nb32); GGML_UNUSED(nb33); GGML_UNUSED(nb01); GGML_UNUSED(nb02); GGML_UNUSED(nb03); GGML_UNUSED(nb11); GGML_UNUSED(nb12); GGML_UNUSED(nb13); GGML_UNUSED(nb21); GGML_UNUSED(nb22); GGML_UNUSED(nb23); GGML_UNUSED(ne0); GGML_UNUSED(ne1); GGML_UNUSED(ne2); GGML_UNUSED(ne3); diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml/src/ggml-cuda/getrows.cu index 963e4d03dd77b..f77b2629a19b0 100644 --- a/ggml/src/ggml-cuda/getrows.cu +++ b/ggml/src/ggml-cuda/getrows.cu @@ -168,6 +168,10 @@ static void ggml_cuda_get_rows_switch_src0_type( get_rows_cuda_float((const float *) src0_d, src1_d, dst_d, ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); break; + case GGML_TYPE_I32: + get_rows_cuda_float((const int32_t *) src0_d, src1_d, dst_d, + ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); + break; case GGML_TYPE_BF16: get_rows_cuda_float((const nv_bfloat16 *) src0_d, src1_d, dst_d, ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); @@ -210,6 +214,10 @@ void get_rows_cuda( ggml_cuda_get_rows_switch_src0_type(src0_d, src0_type, src1_d, (float *) dst_d, ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); break; + case GGML_TYPE_I32: + ggml_cuda_get_rows_switch_src0_type(src0_d, src0_type, src1_d, (int32_t *) dst_d, + ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); + break; case GGML_TYPE_F16: ggml_cuda_get_rows_switch_src0_type(src0_d, src0_type, src1_d, (half *) dst_d, ne00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb1, nb2, nb3, stream); diff --git a/ggml/src/ggml-cuda/ggml-cuda.cu b/ggml/src/ggml-cuda/ggml-cuda.cu index 0bd2904e1c9d1..778d5a48bd9f8 100644 --- a/ggml/src/ggml-cuda/ggml-cuda.cu +++ b/ggml/src/ggml-cuda/ggml-cuda.cu @@ -11,6 +11,8 @@ #include "ggml-cuda/clamp.cuh" #include "ggml-cuda/concat.cuh" #include "ggml-cuda/conv-transpose-1d.cuh" +#include "ggml-cuda/conv2d-dw.cuh" +#include "ggml-cuda/conv2d-transpose.cuh" #include "ggml-cuda/convert.cuh" #include "ggml-cuda/count-equal.cuh" #include "ggml-cuda/cpy.cuh" @@ -35,11 +37,13 @@ #include "ggml-cuda/ssm-scan.cuh" #include "ggml-cuda/sum.cuh" #include "ggml-cuda/sumrows.cuh" +#include "ggml-cuda/mean.cuh" #include "ggml-cuda/tsembd.cuh" #include "ggml-cuda/unary.cuh" #include "ggml-cuda/upscale.cuh" #include "ggml-cuda/wkv.cuh" #include "ggml-cuda/gla.cuh" +#include "ggml-cuda/set-rows.cuh" #include "ggml.h" #include @@ -47,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -54,9 +59,8 @@ #include #include #include -#include -#include #include +#include #include #include #include @@ -97,8 +101,7 @@ int ggml_cuda_get_device() { static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) { ggml_cuda_set_device(device); cudaError_t err; - if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr) - { + if (getenv("GGML_CUDA_ENABLE_UNIFIED_MEMORY") != nullptr) { err = cudaMallocManaged(ptr, size); #if defined(GGML_USE_HIP) if (err == hipSuccess) { @@ -116,9 +119,7 @@ static cudaError_t ggml_cuda_device_malloc(void ** ptr, size_t size, int device) err = cudaMalloc(ptr, size); } #endif // defined(GGML_USE_HIP) - } - else - { + } else { err = cudaMalloc(ptr, size); } return err; @@ -514,6 +515,33 @@ std::unique_ptr ggml_backend_cuda_context::new_pool_for_device(i return std::unique_ptr(new ggml_cuda_pool_leg(device)); } +// destroying a cuBLAS handle while a graph is being captured in a different thread can result in a CUDA error +// this lock is used to ensure that no cuBLAS handle is destroyed while a graph is being captured + +static std::mutex ggml_cuda_lock; +static std::condition_variable ggml_cuda_lock_cv; +static std::atomic ggml_cuda_lock_counter; + +ggml_backend_cuda_context::~ggml_backend_cuda_context() { + std::unique_lock lock(ggml_cuda_lock); + ggml_cuda_lock_cv.wait(lock, []{ return ggml_cuda_lock_counter.load(std::memory_order_relaxed) == 0; }); + + if (copy_event != nullptr) { + CUDA_CHECK(cudaEventDestroy(copy_event)); + } + for (int i = 0; i < GGML_CUDA_MAX_DEVICES; ++i) { + for (int j = 0; j < GGML_CUDA_MAX_STREAMS; ++j) { + if (streams[i][j] != nullptr) { + CUDA_CHECK(cudaStreamDestroy(streams[i][j])); + } + } + if (cublas_handles[i] != nullptr) { + CUBLAS_CHECK(cublasDestroy(cublas_handles[i])); + } + } +} + + // cuda buffer struct ggml_backend_cuda_buffer_context { @@ -1200,9 +1228,12 @@ static void ggml_cuda_op_mul_mat_cublas( const int cc = ggml_cuda_info().devices[id].cc; + const bool supports_bf16 = GGML_CUDA_CC_IS_NVIDIA(cc) || GGML_CUDA_CC_IS_AMD(cc) || + (GGML_CUDA_CC_IS_MTHREADS(cc) && cc >= GGML_CUDA_CC_QY2); + const bool use_fp16 = (src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type)) && ggml_is_contiguous(src0) && row_diff == src0->ne[1] && dst->op_params[0] == GGML_PREC_DEFAULT; - if (src0->type == GGML_TYPE_BF16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) { + if (supports_bf16 && src0->type == GGML_TYPE_BF16 && ggml_is_contiguous(src0) && row_diff == src0->ne[1]) { ggml_cuda_pool_alloc src1_as_bf16(ctx.pool(id)); if (src1->type != GGML_TYPE_BF16) { const to_bf16_cuda_t to_bf16_cuda = ggml_get_to_bf16_cuda(src1->type); @@ -1230,7 +1261,7 @@ static void ggml_cuda_op_mul_mat_cublas( const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_BF16); to_fp32_cuda(dst_bf16.get(), dst_dd_i, row_diff*src1_ncols, stream); - } else if (((GGML_CUDA_CC_IS_NVIDIA(cc) && cc >= GGML_CUDA_CC_VOLTA) || GGML_CUDA_CC_IS_AMD(cc)) && use_fp16) { + } else if (fast_fp16_hardware_available(cc) && use_fp16) { // convert src0 and src1 to fp16, multiply as fp16, convert dst to fp32 ggml_cuda_pool_alloc src0_as_f16(ctx.pool(id)); if (src0->type != GGML_TYPE_F16) { @@ -1719,7 +1750,7 @@ static void ggml_cuda_op_mul_mat( } static __global__ void k_compute_batched_ptrs( - const half * src0_as_f16, const half * src1_as_f16, char * dst, + const void * src0_as_f16, const void * src1_as_f16, char * dst, const void ** ptrs_src, void ** ptrs_dst, int64_t ne12, int64_t ne13, int64_t ne23, @@ -1742,83 +1773,131 @@ static __global__ void k_compute_batched_ptrs( ptrs_dst[0*ne23 + i12 + i13*ne12] = ( char *) dst + i12*nbd2 + i13*nbd3; } -static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +// Type traits for mapping ggml types to CUDA/cuBLAS types +template +struct batched_mul_mat_traits; + +template<> +struct batched_mul_mat_traits { + using cuda_type = float; + static inline const cublasComputeType_t compute_type = CUBLAS_COMPUTE_32F; + static inline const cudaDataType_t data_type = CUDA_R_32F; + static inline const ggml_type ggml_type_val = GGML_TYPE_F32; + static inline const float alpha = 1.0f; + static inline const float beta = 0.0f; + static inline const void* get_alpha() { static const float val = alpha; return &val; } + static inline const void* get_beta() { static const float val = beta; return &val; } + static inline auto get_nc_converter(ggml_type src_type) { return ggml_get_to_fp32_nc_cuda(src_type); } +}; + +template<> +struct batched_mul_mat_traits { + using cuda_type = nv_bfloat16; + static inline const cublasComputeType_t compute_type = CUBLAS_COMPUTE_32F; + static inline const cudaDataType_t data_type = CUDA_R_16BF; + static inline const ggml_type ggml_type_val = GGML_TYPE_BF16; + static inline const float alpha = 1.0f; + static inline const float beta = 0.0f; + static inline const void* get_alpha() { static const float val = alpha; return &val; } + static inline const void* get_beta() { static const float val = beta; return &val; } + static inline auto get_nc_converter(ggml_type src_type) { return ggml_get_to_bf16_nc_cuda(src_type); } +}; + +template<> +struct batched_mul_mat_traits { + using cuda_type = half; + static inline const cublasComputeType_t compute_type = CUBLAS_COMPUTE_16F; + static inline const cudaDataType_t data_type = CUDA_R_16F; + static inline const ggml_type ggml_type_val = GGML_TYPE_F16; + static inline const half alpha = 1.0; + static inline const half beta = 0.0; + static inline const void* get_alpha() { static const half val = alpha; return &val; } + static inline const void* get_beta() { static const half val = beta; return &val; } + static inline auto get_nc_converter(ggml_type src_type) { return ggml_get_to_fp16_nc_cuda(src_type); } +}; + +template +static void ggml_cuda_mul_mat_batched_cublas_impl(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + using traits = batched_mul_mat_traits; + using cuda_t = typename traits::cuda_type; + GGML_ASSERT(!ggml_is_transposed(src0)); GGML_ASSERT(!ggml_is_transposed(src1)); - GGML_ASSERT(!ggml_backend_buft_is_cuda_split(src0->buffer->buft)); - GGML_ASSERT(src0->type == GGML_TYPE_F16); + GGML_ASSERT(src0->type == src0_type); + GGML_ASSERT(ggml_is_contiguous(dst)); // Byte offsets and tensor dimensions are currently used in an inconsistent way for dst. // As long as dst is contiguous this does not matter though. - GGML_ASSERT(ggml_is_contiguous(dst)); GGML_TENSOR_BINARY_OP_LOCALS const int64_t ne_dst = ggml_nelements(dst); - cudaStream_t main_stream = ctx.stream(); - CUBLAS_CHECK(cublasSetStream(ctx.cublas_handle(), main_stream)); - const half * src0_f16 = (const half *) src0->data; float * dst_ddf = (float *) dst->data; - - const half * src1_f16 = (const half *) src1->data; const size_t ts_src1 = ggml_type_size(src1->type); GGML_ASSERT(nb10 == ts_src1); int64_t s11 = nb11 / ts_src1; int64_t s12 = nb12 / ts_src1; int64_t s13 = nb13 / ts_src1; - ggml_cuda_pool_alloc src1_f16_alloc(ctx.pool()); - // convert src1 to fp16 - if (src1->type != GGML_TYPE_F16) { - const to_fp16_nc_cuda_t to_fp16_cuda = ggml_get_to_fp16_nc_cuda(src1->type); - const int64_t ne_src1 = ggml_nelements(src1); - src1_f16_alloc.alloc(ne_src1); - GGML_ASSERT(to_fp16_cuda != nullptr); + const cuda_t * src0_ptr = nullptr; + const cuda_t * src1_ptr = nullptr; - to_fp16_cuda(src1_f16, src1_f16_alloc.get(), ne10, ne11, ne12, ne13, s11, s12, s13, main_stream); + ggml_cuda_pool_alloc src0_alloc(ctx.pool()); + ggml_cuda_pool_alloc src1_alloc(ctx.pool()); + + // Handle src0 + src0_ptr = (const cuda_t *) src0->data; + + // Handle src1 - convert if necessary + if (src1->type == src0_type) { + src1_ptr = (const cuda_t *) src1->data; + } else { + // Convert src1 to target type using traits conversion functions + const int64_t ne_src1 = ggml_nelements(src1); + src1_alloc.alloc(ne_src1); - src1_f16 = src1_f16_alloc.get(); + const auto convert_func = traits::get_nc_converter(src1->type); + GGML_ASSERT(convert_func != nullptr); + convert_func(src1->data, src1_alloc.get(), ne10, ne11, ne12, ne13, s11, s12, s13, main_stream); + src1_ptr = src1_alloc.get(); s11 = ne10; s12 = ne11*s11; s13 = ne12*s12; } - ggml_cuda_pool_alloc dst_f16(ctx.pool()); + // Setup destination buffer + ggml_cuda_pool_alloc dst_temp(ctx.pool()); char * dst_t; - - cublasComputeType_t cu_compute_type = CUBLAS_COMPUTE_16F; - cudaDataType_t cu_data_type = CUDA_R_16F; - - // dst strides size_t nbd2 = dst->nb[2]; size_t nbd3 = dst->nb[3]; - const half alpha_f16 = 1.0f; - const half beta_f16 = 0.0f; - + cublasComputeType_t cu_compute_type = traits::compute_type; + cudaDataType_t cu_data_type = traits::data_type; + cudaDataType_t cu_data_type_a = traits::data_type; + cudaDataType_t cu_data_type_b = traits::data_type; + const void * alpha = traits::get_alpha(); + const void * beta = traits::get_beta(); const float alpha_f32 = 1.0f; - const float beta_f32 = 0.0f; - - const void * alpha = &alpha_f16; - const void * beta = &beta_f16; + const float beta_f32 = 0.0f; if (dst->op_params[0] == GGML_PREC_DEFAULT) { - dst_t = (char *) dst_f16.alloc(ne_dst); - - nbd2 /= sizeof(float) / sizeof(half); - nbd3 /= sizeof(float) / sizeof(half); + if constexpr (src0_type == GGML_TYPE_F32) { + dst_t = (char *) dst_ddf; // Direct F32 output + } else { + dst_t = (char *) dst_temp.alloc(ne_dst); + nbd2 /= sizeof(float) / sizeof(cuda_t); + nbd3 /= sizeof(float) / sizeof(cuda_t); + } } else { dst_t = (char *) dst_ddf; - cu_compute_type = CUBLAS_COMPUTE_32F; - cu_data_type = CUDA_R_32F; - + cu_data_type = CUDA_R_32F; alpha = &alpha_f32; - beta = &beta_f32; + beta = &beta_f32; } int id = ggml_cuda_get_device(); @@ -1826,7 +1905,7 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co if (GGML_CUDA_CC_IS_CDNA(cc) || GGML_CUDA_CC_IS_RDNA4(cc)) { cu_compute_type = CUBLAS_COMPUTE_32F; alpha = &alpha_f32; - beta = &beta_f32; + beta = &beta_f32; } GGML_ASSERT(ne12 % ne02 == 0); @@ -1836,35 +1915,15 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co const int64_t r2 = ne12/ne02; const int64_t r3 = ne13/ne03; -#if 0 - // use cublasGemmEx - { - for (int i13 = 0; i13 < ne13; ++i13) { - for (int i12 = 0; i12 < ne12; ++i12) { - int i03 = i13 / r3; - int i02 = i12 / r2; - - CUBLAS_CHECK( - cublasGemmEx(ctx.cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, - ne01, ne11, ne10, - alpha, (const char *) src0_f16 + i03*nb03 + i02*nb02, CUDA_R_16F, nb01/sizeof(half), - src1_f16 + i13*s13 + i12*s12, CUDA_R_16F, s11, - beta, ( char *) dst_t + i13*nbd3 + i12*nbd2, cu_data_type, ne0, - cu_compute_type, - CUBLAS_GEMM_DEFAULT_TENSOR_OP)); - } - } - } -#else if (r2 == 1 && r3 == 1 && ggml_is_contiguous_2(src0) && ggml_is_contiguous_2(src1)) { // there is no broadcast and src0, src1 are contiguous across dims 2, 3 // use cublasGemmStridedBatchedEx CUBLAS_CHECK( cublasGemmStridedBatchedEx(ctx.cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, ne01, ne11, ne10, - alpha, src0_f16, CUDA_R_16F, nb01/nb00, nb02/nb00, // strideA - src1_f16, CUDA_R_16F, s11, s12, // strideB - beta, dst_t, cu_data_type, ne0, ne1*ne0, // strideC + alpha, src0_ptr, cu_data_type_a, nb01/nb00, nb02/nb00, // strideA + src1_ptr, cu_data_type_b, s11, s12, // strideB + beta, dst_t, cu_data_type, ne0, ne1*ne0, // strideC ne12*ne13, cu_compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP)); @@ -1875,34 +1934,55 @@ static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, co ggml_cuda_pool_alloc ptrs_src(ctx.pool(), 2*ne23); ggml_cuda_pool_alloc< void *> ptrs_dst(ctx.pool(), 1*ne23); + size_t src1_stride_size = sizeof(cuda_t); + dim3 block_dims(ne13, ne12); k_compute_batched_ptrs<<<1, block_dims, 0, main_stream>>>( - src0_f16, src1_f16, dst_t, + src0_ptr, src1_ptr, dst_t, ptrs_src.get(), ptrs_dst.get(), ne12, ne13, ne23, nb02, nb03, - src1->type == GGML_TYPE_F16 ? nb12 : s12*sizeof(half), - src1->type == GGML_TYPE_F16 ? nb13 : s13*sizeof(half), + (src1->type == src0_type) ? nb12 : s12*src1_stride_size, + (src1->type == src0_type) ? nb13 : s13*src1_stride_size, nbd2, nbd3, r2, r3); + CUDA_CHECK(cudaGetLastError()); CUBLAS_CHECK( cublasGemmBatchedEx(ctx.cublas_handle(), CUBLAS_OP_T, CUBLAS_OP_N, ne01, ne11, ne10, - alpha, (const void **) (ptrs_src.get() + 0*ne23), CUDA_R_16F, nb01/nb00, - (const void **) (ptrs_src.get() + 1*ne23), CUDA_R_16F, s11, - beta, ( void **) (ptrs_dst.get() + 0*ne23), cu_data_type, ne0, + alpha, (const void **) (ptrs_src.get() + 0*ne23), cu_data_type_a, nb01/nb00, + (const void **) (ptrs_src.get() + 1*ne23), cu_data_type_b, s11, + beta, ( void **) (ptrs_dst.get() + 0*ne23), cu_data_type, ne0, ne23, cu_compute_type, CUBLAS_GEMM_DEFAULT_TENSOR_OP)); } -#endif - if (dst->op_params[0] == GGML_PREC_DEFAULT && cu_data_type == CUDA_R_16F) { - const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(GGML_TYPE_F16); - to_fp32_cuda(dst_f16.get(), dst_ddf, ne_dst, main_stream); + // Convert output back to F32 if needed + if (dst->op_params[0] == GGML_PREC_DEFAULT && cu_data_type != CUDA_R_32F) { + const to_fp32_cuda_t to_fp32_cuda = ggml_get_to_fp32_cuda(traits::ggml_type_val); + to_fp32_cuda(dst_temp.get(), dst_ddf, ne_dst, main_stream); + } +} + +static void ggml_cuda_mul_mat_batched_cublas(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16 || src0->type == GGML_TYPE_F32); + + switch (src0->type) { + case GGML_TYPE_F32: + ggml_cuda_mul_mat_batched_cublas_impl(ctx, src0, src1, dst); + break; + case GGML_TYPE_BF16: + ggml_cuda_mul_mat_batched_cublas_impl(ctx, src0, src1, dst); + break; + case GGML_TYPE_F16: + ggml_cuda_mul_mat_batched_cublas_impl(ctx, src0, src1, dst); + break; + default: + GGML_ABORT("Unsupported type"); } } @@ -1916,16 +1996,14 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor && ggml_nbytes(src0) != ggml_backend_buffer_get_alloc_size(src0->buffer, src0) && src0->view_src; bool use_mul_mat_vec = (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16) - && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 - && src0->ne[0] % 2 == 0 && src1->ne[1] == 1; + && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32; bool use_mul_mat_vec_q = ggml_is_quantized(src0->type) && !bad_padding_clear && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && src1->ne[1] <= MMVQ_MAX_BATCH_SIZE; bool use_mul_mat_q = ggml_is_quantized(src0->type) && !bad_padding_clear && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32; - bool any_gpus_with_slow_fp16 = false; - bool any_gpus_without_fp16_mma = false; + bool any_gpus_with_slow_fp16 = false; if (split) { ggml_backend_cuda_split_buffer_type_context * buft_ctx = (ggml_backend_cuda_split_buffer_type_context *) src0->buffer->buft->context; @@ -1936,16 +2014,16 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor continue; } - const int cc = ggml_cuda_info().devices[id].cc; - use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]); - any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc); - any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_hardware_available(cc); + const int cc = ggml_cuda_info().devices[id].cc; + use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]); + use_mul_mat_vec = use_mul_mat_vec && ggml_cuda_should_use_mmv(src0->type, cc, src0->ne, src1->ne[1]); + any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc); } } else { - const int cc = ggml_cuda_info().devices[ctx.device].cc; - use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]); - any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc); - any_gpus_without_fp16_mma = any_gpus_without_fp16_mma || !fp16_mma_hardware_available(cc); + const int cc = ggml_cuda_info().devices[ctx.device].cc; + use_mul_mat_q = use_mul_mat_q && ggml_cuda_should_use_mmq(src0->type, cc, src1->ne[1]); + use_mul_mat_vec = use_mul_mat_vec && ggml_cuda_should_use_mmv(src0->type, cc, src0->ne, src1->ne[1]); + any_gpus_with_slow_fp16 = any_gpus_with_slow_fp16 || !fast_fp16_hardware_available(cc); } // debug helpers @@ -1956,7 +2034,13 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor //printf("src0 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src0), ggml_is_transposed(src0), ggml_type_name(src0->type), src0->name); //printf("src1 is contiguous %d, transposed %d, type = %s, name = %s\n", ggml_is_contiguous(src1), ggml_is_transposed(src1), ggml_type_name(src1->type), src1->name); - if (!split && use_mul_mat_vec && (src0->ne[1] <= MMV_MAX_ROWS || any_gpus_without_fp16_mma)) { + //TODO update for generic tensor parallelism + const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc; + bool use_batched_cublas_f16 = src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16); + bool use_batched_cublas_bf16 = src0->type == GGML_TYPE_BF16 && bf16_mma_hardware_available(cc); + bool use_batched_cublas_f32 = src0->type == GGML_TYPE_F32; + + if (!split && use_mul_mat_vec) { // the custom F16 vector kernel can be used over batched cuBLAS GEMM // but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention) ggml_cuda_mul_mat_vec(ctx, src0, src1, nullptr, dst); @@ -1964,8 +2048,8 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor ggml_cuda_mul_mat_vec_q(ctx, src0, src1, nullptr, dst); } else if (!split && use_mul_mat_q) { ggml_cuda_mul_mat_q(ctx, src0, src1, nullptr, dst); - } else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) && - !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { + } else if (!split && (use_batched_cublas_f16 || use_batched_cublas_bf16 || use_batched_cublas_f32) + && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { // general KQ + KQV multi-batch without FlashAttention ggml_cuda_mul_mat_batched_cublas(ctx, src0, src1, dst); } else if (use_mul_mat_vec) { @@ -2147,6 +2231,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_OP_GET_ROWS_BACK: ggml_cuda_op_get_rows_back(ctx, dst); break; + case GGML_OP_SET_ROWS: + ggml_cuda_op_set_rows(ctx, dst); + break; case GGML_OP_DUP: ggml_cuda_dup(ctx, dst); break; @@ -2216,6 +2303,30 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_UNARY_OP_EXP: ggml_cuda_op_exp(ctx, dst); break; + case GGML_UNARY_OP_ELU: + ggml_cuda_op_elu(ctx, dst); + break; + default: + return false; + } + break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(dst)) { + case GGML_GLU_OP_REGLU: + ggml_cuda_op_reglu(ctx, dst); + break; + case GGML_GLU_OP_GEGLU: + ggml_cuda_op_geglu(ctx, dst); + break; + case GGML_GLU_OP_SWIGLU: + ggml_cuda_op_swiglu(ctx, dst); + break; + case GGML_GLU_OP_GEGLU_ERF: + ggml_cuda_op_geglu_erf(ctx, dst); + break; + case GGML_GLU_OP_GEGLU_QUICK: + ggml_cuda_op_geglu_quick(ctx, dst); + break; default: return false; } @@ -2310,6 +2421,12 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_OP_IM2COL: ggml_cuda_op_im2col(ctx, dst); break; + case GGML_OP_CONV_2D_DW: + ggml_cuda_op_conv2d_dw(ctx, dst); + break; + case GGML_OP_CONV_TRANSPOSE_2D: + ggml_cuda_conv_2d_transpose_p0(ctx, dst); + break; case GGML_OP_CONV_TRANSPOSE_1D: ggml_cuda_op_conv_transpose_1d(ctx,dst); break; @@ -2322,6 +2439,9 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg case GGML_OP_SUM_ROWS: ggml_cuda_op_sum_rows(ctx, dst); break; + case GGML_OP_MEAN: + ggml_cuda_op_mean(ctx, dst); + break; case GGML_OP_SSM_CONV: ggml_cuda_op_ssm_conv(ctx, dst); break; @@ -2664,7 +2784,9 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx ggml_backend_buft_is_cuda_split(node->src[j]->buffer->buft) || (integrated && ggml_backend_buft_is_cuda_host(node->src[j]->buffer->buft))); } } -#endif +#else + GGML_UNUSED(integrated); +#endif // NDEBUG bool ok = ggml_cuda_compute_forward(*cuda_ctx, node); if (!ok) { @@ -2683,6 +2805,11 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx CUDA_CHECK(cudaStreamEndCapture(cuda_ctx->stream(), &cuda_ctx->cuda_graph->graph)); graph_evaluated_or_captured = true; // CUDA graph has been captured + + std::lock_guard lock(ggml_cuda_lock); + if (ggml_cuda_lock_counter.fetch_sub(1, std::memory_order_relaxed) == 1) { + ggml_cuda_lock_cv.notify_all(); + } } else { graph_evaluated_or_captured = true; // ggml graph has been directly evaluated } @@ -2758,7 +2885,13 @@ static enum ggml_status ggml_backend_cuda_graph_compute(ggml_backend_t backend, } } - if (use_cuda_graph && cuda_graph_update_required) { // Start CUDA graph capture + if (use_cuda_graph && cuda_graph_update_required) { + // Start CUDA graph capture + { + std::lock_guard lock(ggml_cuda_lock); + ggml_cuda_lock_counter.fetch_add(1, std::memory_order_relaxed); + } + CUDA_CHECK(cudaStreamBeginCapture(cuda_ctx->stream(), cudaStreamCaptureModeRelaxed)); } @@ -2986,11 +3119,24 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_TANH: case GGML_UNARY_OP_EXP: + case GGML_UNARY_OP_ELU: return ggml_is_contiguous(op->src[0]); default: return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(op)) { + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + return ggml_is_contiguous_1(op->src[0]); + default: + return false; + } + break; case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: { @@ -3014,9 +3160,16 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g return false; } #ifdef GGML_USE_MUSA - if (b->type == GGML_TYPE_F16 && b->ne[2]*b->ne[3] > 1 && - !ggml_is_transposed(a) && !ggml_is_transposed(b)) { - return false; + const int cc = ggml_cuda_info().devices[dev_ctx->device].cc; + if (b->ne[2]*b->ne[3] > 1 && !ggml_is_transposed(a) && !ggml_is_transposed(b)) { + if (GGML_CUDA_CC_IS_QY1(cc) && op->op == GGML_OP_MUL_MAT && + a->type == GGML_TYPE_F16 && b->type == GGML_TYPE_F16) { + return false; + } + if (GGML_CUDA_CC_IS_QY2(cc) && op->op == GGML_OP_MUL_MAT_ID && + a->type == GGML_TYPE_Q2_K && b->type == GGML_TYPE_F32) { + return false; + } } #endif // GGML_USE_MUSA switch (a->type) { @@ -3043,11 +3196,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_TYPE_IQ4_NL: case GGML_TYPE_IQ4_XS: case GGML_TYPE_BF16: -#ifdef GGML_USE_MUSA - if (a->type == GGML_TYPE_Q3_K) { - return false; - } -#endif // GGML_USE_MUSA return true; default: return false; @@ -3060,6 +3208,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g switch (op->src[0]->type) { case GGML_TYPE_F16: case GGML_TYPE_F32: + case GGML_TYPE_BF16: + case GGML_TYPE_I32: case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: case GGML_TYPE_Q5_0: @@ -3074,6 +3224,13 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g { return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32 && op->ne[2] == 1 && op->ne[3] == 1; } break; + case GGML_OP_SET_ROWS: + { +#pragma message("TODO: implement Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)") + return (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16) && + op->src[0]->type == GGML_TYPE_F32 && + op->src[1]->type == GGML_TYPE_I64; + } break; case GGML_OP_CPY: { ggml_type src0_type = op->src[0]->type; @@ -3189,12 +3346,26 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_COS: case GGML_OP_CLAMP: case GGML_OP_LOG: - case GGML_OP_SSM_SCAN: - case GGML_OP_SSM_CONV: return true; + case GGML_OP_SSM_SCAN: { + if (op->src[3]->ne[0] == 1) { + // Mamba2 + // (kernel only supports (d_state == 128 || d_state == 256) && d_head % 16 == 0) + return (op->src[0]->ne[0] == 128 || op->src[0]->ne[0] == 256) && op->src[0]->ne[1] % 16 == 0; + } else { + // Mamba + // (kernel only supports d_state == 16, d_head == 1, n_head % 128 == 0, n_group == 1) + return op->src[0]->ne[0] == 16 && op->src[0]->ne[1] == 1 && op->src[0]->ne[2] % 128 == 0 && op->src[4]->ne[1] == 1; + } + } + case GGML_OP_SSM_CONV: { + // assumes d_inner % threads == 0 + return op->src[0]->ne[1] % 128 == 0; + } case GGML_OP_CONT: return op->src[0]->type != GGML_TYPE_BF16; case GGML_OP_DIAG_MASK_INF: + return true; case GGML_OP_SOFT_MAX: return true; case GGML_OP_SOFT_MAX_BACK: { @@ -3207,16 +3378,18 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g return op->src[0]->nb[0] == ggml_type_size(op->src[0]->type) && ggml_is_contiguous_2(op->src[0]); } case GGML_OP_IM2COL: + case GGML_OP_CONV_2D_DW: + case GGML_OP_CONV_TRANSPOSE_2D: case GGML_OP_POOL_2D: case GGML_OP_SUM: case GGML_OP_SUM_ROWS: + case GGML_OP_MEAN: case GGML_OP_ARGSORT: case GGML_OP_ACC: return true; case GGML_OP_GROUP_NORM: return ggml_is_contiguous(op->src[0]); case GGML_OP_UPSCALE: - return op->src[0]->type == GGML_TYPE_F32 && op->op_params[0] == GGML_SCALE_MODE_NEAREST; case GGML_OP_PAD: case GGML_OP_ARANGE: case GGML_OP_TIMESTEP_EMBEDDING: @@ -3240,9 +3413,6 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g if (op->src[0]->ne[0] == 192) { return false; } - if (op->src[0]->ne[3] != 1) { - return false; - } if (op->src[1]->type == GGML_TYPE_BF16 || op->src[2]->type == GGML_TYPE_BF16) { return false; } @@ -3255,6 +3425,9 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g if (op->src[0]->ne[0] == 256 && op->src[1]->type == GGML_TYPE_F16 && op->src[2]->type == GGML_TYPE_F16) { return true; } + if (op->src[3] && op->src[3]->ne[2] != 1) { + return false; + } return fp16_mma_available(ggml_cuda_info().devices[dev_ctx->device].cc) && op->src[1]->type == GGML_TYPE_F16 && op->src[2]->type == GGML_TYPE_F16; } diff --git a/ggml/src/ggml-cuda/mean.cu b/ggml/src/ggml-cuda/mean.cu new file mode 100644 index 0000000000000..4b238a3998ba3 --- /dev/null +++ b/ggml/src/ggml-cuda/mean.cu @@ -0,0 +1,19 @@ +#include "mean.cuh" + +void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const float * src0_d = (const float *) src0->data; + float * dst_d = (float *) dst->data; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(dst->type == GGML_TYPE_F32); + GGML_ASSERT(ggml_is_contiguous(src0)); + + const int64_t ncols = src0->ne[0]; + const int64_t nrows = ggml_nrows(src0); + + const dim3 block_dims(WARP_SIZE, 1, 1); + const dim3 block_nums(nrows, 1, 1); + reduce_rows_f32<<>>(src0_d, dst_d, ncols); +} diff --git a/ggml/src/ggml-cuda/mean.cuh b/ggml/src/ggml-cuda/mean.cuh new file mode 100644 index 0000000000000..2b9b10433438e --- /dev/null +++ b/ggml/src/ggml-cuda/mean.cuh @@ -0,0 +1,3 @@ +#include "common.cuh" + +void ggml_cuda_op_mean(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml/src/ggml-cuda/mmq.cuh index 80baf459c15f2..9696a32046212 100644 --- a/ggml/src/ggml-cuda/mmq.cuh +++ b/ggml/src/ggml-cuda/mmq.cuh @@ -3016,14 +3016,8 @@ static void launch_mul_mat_q(ggml_backend_cuda_context & ctx, const mmq_args & a const int nbytes_shared = mmq_get_nbytes_shared(mmq_x, mmq_y, cc); -#if !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) - static bool shared_memory_limit_raised[GGML_CUDA_MAX_DEVICES] = {false}; - if (!shared_memory_limit_raised[id]) { - CUDA_CHECK(cudaFuncSetAttribute(mul_mat_q, cudaFuncAttributeMaxDynamicSharedMemorySize, nbytes_shared)); - CUDA_CHECK(cudaFuncSetAttribute(mul_mat_q, cudaFuncAttributeMaxDynamicSharedMemorySize, nbytes_shared)); - shared_memory_limit_raised[id] = true; - } -#endif // !(defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)) && !defined(GGML_USE_MUSA) + CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q), nbytes_shared); + CUDA_SET_SHARED_MEMORY_LIMIT((mul_mat_q), nbytes_shared); const int nty = (args.nrows_x + mmq_y - 1) / mmq_y; const int ntx = (args.ncols_dst + mmq_x - 1) / mmq_x; diff --git a/ggml/src/ggml-cuda/mmv.cu b/ggml/src/ggml-cuda/mmv.cu index d8c385e2399ae..e14c93516bddf 100644 --- a/ggml/src/ggml-cuda/mmv.cu +++ b/ggml/src/ggml-cuda/mmv.cu @@ -2,25 +2,26 @@ #include "common.cuh" #include "mmv.cuh" -template +template static __global__ void mul_mat_vec( const T * __restrict__ x, const float * __restrict__ y, const int32_t * __restrict__ ids, float * __restrict__ dst, - const int64_t ncols2, const int64_t nchannels_y, const int64_t stride_row, - const int64_t channel_ratio, const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst, - const int64_t sample_ratio, const int64_t stride_sample_x, const int64_t stride_sample_y, const int64_t stride_sample_dst) { - const int64_t row = blockIdx.x; - const int64_t channel_dst = blockIdx.y; - const int64_t channel_x = ids ? ids[channel_dst] : channel_dst / channel_ratio; - const int64_t channel_y = ids ? channel_dst % nchannels_y : channel_dst; - const int64_t sample_dst = blockIdx.z; - const int64_t sample_x = sample_dst / sample_ratio; - const int64_t sample_y = sample_dst; - const int tid = threadIdx.x; + const int ncols2, const int nchannels_y, const int stride_row, const int stride_col_y2, const int stride_col_dst, + const int channel_ratio, const int stride_channel_x, const int stride_channel_y, const int stride_channel_dst, + const int sample_ratio, const int stride_sample_x, const int stride_sample_y, const int stride_sample_dst) { + const int row = blockIdx.x; + const int channel_dst = blockIdx.y; + const int channel_x = ids ? ids[channel_dst] : channel_dst / channel_ratio; + const int channel_y = ids ? channel_dst % nchannels_y : channel_dst; + const int sample_dst = blockIdx.z; + const int sample_x = sample_dst / sample_ratio; + const int sample_y = sample_dst; + const int tid = threadIdx.x; + constexpr int warp_size = ggml_cuda_get_physical_warp_size(); - x += sample_x *stride_sample_x + channel_x *stride_channel_x + row*stride_row; - y += sample_y *stride_sample_y + channel_y *stride_channel_y; - dst += sample_dst*stride_sample_dst + channel_dst*stride_channel_dst; + x += int64_t(sample_x) *stride_sample_x + channel_x *stride_channel_x + row*stride_row; + y += int64_t(sample_y) *stride_sample_y + channel_y *stride_channel_y; + dst += int64_t(sample_dst)*stride_sample_dst + channel_dst*stride_channel_dst; const float2 * y2 = (const float2 *) y; @@ -34,81 +35,108 @@ static __global__ void mul_mat_vec( __syncthreads(); } - float sumf = 0.0f; + float sumf[ncols_dst] = {0.0f}; if constexpr (std::is_same::value) { const float2 * x2 = (const float2 *) x; - for (int64_t col2 = tid; col2 < ncols2; col2 += block_size) { + for (int col2 = tid; col2 < ncols2; col2 += block_size) { const float2 tmpx = x2[col2]; - const float2 tmpy = y2[col2]; - sumf += tmpx.x*tmpy.x; - sumf += tmpx.y*tmpy.y; + +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + const float2 tmpy = y2[j*stride_col_y2 + col2]; + sumf[j] += tmpx.x*tmpy.x; + sumf[j] += tmpx.y*tmpy.y; + } } } else if constexpr (std::is_same::value) { const half2 * x2 = (const half2 *) x; if (std::is_same::value) { - for (int64_t col2 = tid; col2 < ncols2; col2 += block_size) { + for (int col2 = tid; col2 < ncols2; col2 += block_size) { const float2 tmpx = __half22float2(x2[col2]); - const float2 tmpy = y2[col2]; - sumf += tmpx.x * tmpy.x; - sumf += tmpx.y * tmpy.y; + +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + const float2 tmpy = y2[j*stride_col_y2 + col2]; + sumf[j] += tmpx.x * tmpy.x; + sumf[j] += tmpx.y * tmpy.y; + } } } else { #ifdef FP16_AVAILABLE - half2 sumh2 = make_half2(0.0f, 0.0f); + half2 sumh2[ncols_dst] = {{0.0f, 0.0f}}; + + for (int col2 = tid; col2 < ncols2; col2 += block_size) { + const half2 tmpx = x2[col2]; - for (int64_t col2 = tid; col2 < ncols2; col2 += block_size) { - const float2 tmp = y2[col2]; - sumh2 += x2[col2] * make_half2(tmp.x, tmp.y); +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + const float2 tmpy = y2[j*stride_col_y2 + col2]; + sumh2[j] += tmpx * make_half2(tmpy.x, tmpy.y); + } } - sumf = __low2float(sumh2) + __high2float(sumh2); +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + sumf[j] = __low2float(sumh2[j]) + __high2float(sumh2[j]); + } #else NO_DEVICE_CODE; #endif // FP16_AVAILABLE } } else if constexpr (std::is_same::value) { const int * x2 = (const int *) x; - for (int64_t col2 = tid; col2 < ncols2; col2 += block_size) { - const int tmpx = x2[col2]; - const float2 tmpy = y2[col2]; - sumf += float(reinterpret_cast(&tmpx)[0]) * tmpy.x; - sumf += float(reinterpret_cast(&tmpx)[1]) * tmpy.y; + for (int col2 = tid; col2 < ncols2; col2 += block_size) { + const int tmpx = x2[col2]; +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + const float2 tmpy = y2[j*stride_col_y2 + col2]; + sumf[j] += float(reinterpret_cast(&tmpx)[0]) * tmpy.x; + sumf[j] += float(reinterpret_cast(&tmpx)[1]) * tmpy.y; + } } } else { static_assert(std::is_same::value, "unsupported type"); } - sumf = warp_reduce_sum(sumf); +#pragma unroll + for (int j = 0; j < ncols_dst; ++j) { + sumf[j] = warp_reduce_sum(sumf[j]); - if (block_size > warp_size) { - buf_iw[tid/warp_size] = sumf; - __syncthreads(); - if (tid >= warp_size) { - return; + if (block_size > warp_size) { + buf_iw[tid/warp_size] = sumf[j]; + __syncthreads(); + if (tid < warp_size) { + sumf[j] = buf_iw[tid]; + sumf[j] = warp_reduce_sum(sumf[j]); + } + if (j < ncols_dst) { + __syncthreads(); + } } - sumf = buf_iw[tid]; - sumf = warp_reduce_sum(sumf); } - if (tid != 0) { + if (tid >= ncols_dst) { return; } - dst[row] = sumf; + dst[tid*stride_col_dst + row] = sumf[tid]; } -template +template static void launch_mul_mat_vec_cuda( const T * x, const float * y, const int32_t * ids, float * dst, - const int64_t ncols, const int64_t nrows, const int64_t stride_row, const int64_t nchannels_x, const int64_t nchannels_y, const int64_t nchannels_dst, + const int64_t ncols, const int64_t nrows, + const int64_t stride_row, const int64_t stride_col_y, const int64_t stride_col_dst, + const int64_t nchannels_x, const int64_t nchannels_y, const int64_t nchannels_dst, const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst, const int64_t nsamples_x, const int64_t nsamples_dst, const int64_t stride_sample_x, const int64_t stride_sample_y, const int64_t stride_sample_dst, cudaStream_t stream) { - GGML_ASSERT(ncols % 2 == 0); - GGML_ASSERT(stride_row % 2 == 0); + GGML_ASSERT(ncols % 2 == 0); + GGML_ASSERT(stride_row % 2 == 0); + GGML_ASSERT(stride_col_y % 2 == 0); GGML_ASSERT(ids || nchannels_dst % nchannels_x == 0); GGML_ASSERT( nsamples_dst % nsamples_x == 0); const int64_t channel_ratio = nchannels_dst / nchannels_x; @@ -138,44 +166,52 @@ static void launch_mul_mat_vec_cuda( const dim3 block_dims(block_size_best, 1, 1); switch (block_size_best) { case 32: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 64: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 96: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 128: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 160: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 192: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 224: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; case 256: { - mul_mat_vec<<>> - (x, y, ids, dst, ncols/2, nchannels_y, stride_row, channel_ratio, stride_channel_x, stride_channel_y, - stride_channel_dst, sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); + mul_mat_vec<<>> + (x, y, ids, dst, ncols/2, nchannels_y, stride_row, stride_col_y/2, stride_col_dst, + channel_ratio, stride_channel_x, stride_channel_y, stride_channel_dst, + sample_ratio, stride_sample_x, stride_sample_y, stride_sample_dst); } break; default: { GGML_ABORT("fatal error"); @@ -183,23 +219,91 @@ static void launch_mul_mat_vec_cuda( } } +template +static void mul_mat_vec_cuda_switch_ncols_dst( + const T * x, const float * y, const int32_t * ids, float * dst, + const int64_t ncols, const int64_t nrows, const int64_t ncols_dst, + const int64_t stride_row, const int64_t stride_col_y, const int64_t stride_col_dst, + const int64_t nchannels_x, const int64_t nchannels_y, const int64_t nchannels_dst, + const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst, const int64_t nsamples_x, + const int64_t nsamples_dst, const int64_t stride_sample_x, const int64_t stride_sample_y, const int64_t stride_sample_dst, + cudaStream_t stream) { + switch (ncols_dst) { + case 1: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 2: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 3: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 4: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 5: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 6: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 7: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + case 8: + launch_mul_mat_vec_cuda + (x, y, ids, dst, ncols, nrows, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); + break; + default: + GGML_ABORT("fatal error"); + break; + } +} + template static void mul_mat_vec_cuda( const T * x, const float * y, const int32_t * ids, float * dst, - const int64_t ncols, const int64_t nrows, const int64_t stride_row, const int64_t nchannels_x, const int64_t nchannels_y, const int64_t nchannels_dst, + const int64_t ncols, const int64_t nrows, const int64_t ncols_dst, + const int64_t stride_row, const int64_t stride_col_y, const int stride_col_dst, + const int64_t nchannels_x, const int64_t nchannels_y, const int64_t nchannels_dst, const int64_t stride_channel_x, const int64_t stride_channel_y, const int64_t stride_channel_dst, const int64_t nsamples_x, const int64_t nsamples_dst, const int64_t stride_sample_x, const int64_t stride_sample_y, const int64_t stride_sample_dst, enum ggml_prec prec, cudaStream_t stream) { if constexpr(std::is_same::value) { if (prec == GGML_PREC_DEFAULT) { - launch_mul_mat_vec_cuda - (x, y, ids, dst, ncols, nrows, stride_row, nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + mul_mat_vec_cuda_switch_ncols_dst + (x, y, ids, dst, ncols, nrows, ncols_dst, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); return; } } - launch_mul_mat_vec_cuda - (x, y, ids, dst, ncols, nrows, stride_row, nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, + mul_mat_vec_cuda_switch_ncols_dst + (x, y, ids, dst, ncols, nrows, ncols_dst, stride_row, stride_col_y, stride_col_dst, + nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, stream); } @@ -246,24 +350,24 @@ void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * const int64_t stride_channel_dst = ids ? s1 : s2; const int64_t stride_channel_y = ids ? s11 : s12; - GGML_ASSERT(ncols_dst == 1); + GGML_ASSERT(!ids || ncols_dst == 1); switch (src0->type) { case GGML_TYPE_F32: { const float * src0_d = (const float *) src0->data; - mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, s01, + mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, ncols_dst, s01, s11, s1, ne02, nchannels_y, nchannels_dst, s02, stride_channel_y, stride_channel_dst, ne03, ne3, s03, s13, s3, prec, ctx.stream()); } break; case GGML_TYPE_F16: { const half * src0_d = (const half *) src0->data; - mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, s01, + mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, ncols_dst, s01, s11, s1, ne02, nchannels_y, nchannels_dst, s02, stride_channel_y, stride_channel_dst, ne03, ne3, s03, s13, s3, prec, ctx.stream()); } break; case GGML_TYPE_BF16: { const nv_bfloat16 * src0_d = (const nv_bfloat16 *) src0->data; - mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, s01, + mul_mat_vec_cuda(src0_d, src1_d, ids_d, dst_d, ne00, ne01, ncols_dst, s01, s11, s1, ne02, nchannels_y, nchannels_dst, s02, stride_channel_y, stride_channel_dst, ne03, ne3, s03, s13, s3, prec, ctx.stream()); } break; @@ -282,16 +386,19 @@ void ggml_cuda_op_mul_mat_vec( GGML_ASSERT(dst->type == GGML_TYPE_F32); const int64_t ne00 = src0->ne[0]; + const int64_t ne10 = src1->ne[0]; + const int64_t ne0 = dst->ne[0]; const int64_t row_diff = row_high - row_low; - GGML_ASSERT(src1_ncols == 1); - - const int cc = ggml_cuda_info().devices[ggml_cuda_get_device()].cc; + const int id = ggml_cuda_get_device(); + const int cc = ggml_cuda_info().devices[id].cc; const enum ggml_prec prec = fast_fp16_available(cc) ? ggml_prec(dst->op_params[0]) : GGML_PREC_F32; // ggml_cuda_op provides single, contiguous matrices const int64_t stride_row = ne00; + const int64_t stride_col_y = ne10; + const int64_t stride_col_dst = id == ctx.device ? ne0 : row_diff; // main device has larger memory buffer const int64_t nchannels_x = 1; const int64_t nchannels_y = 1; const int64_t nchannels_dst = 1; @@ -307,19 +414,19 @@ void ggml_cuda_op_mul_mat_vec( switch (src0->type) { case GGML_TYPE_F32: { const float * src0_d = (const float *) src0_dd_i; - mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, stride_row, + mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, src1_ncols, stride_row, stride_col_y, stride_col_dst, nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, prec, stream); } break; case GGML_TYPE_F16: { const half * src0_d = (const half *) src0_dd_i; - mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, stride_row, + mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, src1_ncols, stride_row, stride_col_y, stride_col_dst, nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, prec, stream); } break; case GGML_TYPE_BF16: { const nv_bfloat16 * src0_d = (const nv_bfloat16 *) src0_dd_i; - mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, stride_row, + mul_mat_vec_cuda(src0_d, src1_ddf_i, nullptr, dst_dd_i, ne00, row_diff, src1_ncols, stride_row, stride_col_y, stride_col_dst, nchannels_x, nchannels_y, nchannels_dst, stride_channel_x, stride_channel_y, stride_channel_dst, nsamples_x, nsamples_dst, stride_sample_x, stride_sample_y, stride_sample_dst, prec, stream); } break; @@ -334,3 +441,66 @@ void ggml_cuda_op_mul_mat_vec( GGML_UNUSED(src1_ncols); GGML_UNUSED(src1_padded_row_size); } + +bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_ne, int64_t ne11) { + if (src0_ne[0] % 2 != 0) { + return false; + } + switch (type) { + case GGML_TYPE_F32: + if (GGML_CUDA_CC_IS_NVIDIA(cc)) { + if (cc >= GGML_CUDA_CC_ADA_LOVELACE) { + return ne11 <= 8; + } + if (cc >= GGML_CUDA_CC_TURING) { + return ne11 <= 4; + } + return ne11 <= 3; + } else if (GGML_CUDA_CC_IS_AMD(cc)) { + if (fp32_mma_hardware_available(cc)) { + return ne11 <= 3; + } + return ne11 <= 8; + } + return ne11 <= 8; + case GGML_TYPE_F16: + if (GGML_CUDA_CC_IS_NVIDIA(cc)) { + const bool src0_small = (src0_ne[1] <= 512 || src0_ne[2]*src0_ne[3] == 1); + if (cc >= GGML_CUDA_CC_ADA_LOVELACE) { + return src0_small && ne11 <= 4; + } + if (fp16_mma_hardware_available(cc)) { + return src0_small && ne11 <= 3; + } + return ne11 <= 8; + } else if (GGML_CUDA_CC_IS_AMD(cc)) { + if (fp16_mma_hardware_available(cc)) { + if (GGML_CUDA_CC_IS_RDNA3(cc) || GGML_CUDA_CC_IS_RDNA4(cc)) { + return ne11 <= 5; + } + return ne11 <= 2; + } + return ne11 <= 8; + } + return ne11 <= 8; + case GGML_TYPE_BF16: + if (GGML_CUDA_CC_IS_NVIDIA(cc)) { + const bool src0_small = (src0_ne[1] <= 512 || src0_ne[2]*src0_ne[3] == 1); + if (cc >= GGML_CUDA_CC_ADA_LOVELACE) { + return src0_small && ne11 <= 4; + } + if (bf16_mma_hardware_available(cc)) { + return src0_small && ne11 <= 3; + } + return ne11 <= 8; + } else if (GGML_CUDA_CC_IS_AMD(cc)) { + if (bf16_mma_hardware_available(cc)) { + return ne11 <= 3; + } + return ne11 <= 8; + } + return ne11 <= 8; + default: + return false; + } +} diff --git a/ggml/src/ggml-cuda/mmv.cuh b/ggml/src/ggml-cuda/mmv.cuh index 756e7e1cc7fc3..1330bcb6a8860 100644 --- a/ggml/src/ggml-cuda/mmv.cuh +++ b/ggml/src/ggml-cuda/mmv.cuh @@ -1,8 +1,5 @@ #include "common.cuh" -// maximum number of src0 rows with which to use mul_mat_vec over cuBLAS if FP16 tensor cores are available -#define MMV_MAX_ROWS 512 - void ggml_cuda_mul_mat_vec(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * ids, ggml_tensor * dst); void ggml_cuda_op_mul_mat_vec( @@ -10,3 +7,5 @@ void ggml_cuda_op_mul_mat_vec( const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, const char * src0_dd_i, const float * src1_ddf_i, const char * src1_ddq_i, float * dst_dd_i, const int64_t row_low, const int64_t row_high, const int64_t src1_ncols, const int64_t src1_padded_row_size, cudaStream_t stream); + +bool ggml_cuda_should_use_mmv(enum ggml_type type, int cc, const int64_t * src0_ne, int64_t ne11); diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml/src/ggml-cuda/rope.cu index 18f691b2d3103..d058504cd6cc0 100644 --- a/ggml/src/ggml-cuda/rope.cu +++ b/ggml/src/ggml-cuda/rope.cu @@ -50,21 +50,19 @@ static __global__ void rope_norm( const int row_dst = blockDim.x*blockIdx.x + threadIdx.x; - if (i0 >= n_dims) { - const int i = row_dst*ne0 + i0; - - dst[i + 0] = x[i + 0]; - dst[i + 1] = x[i + 1]; - - return; - } - const int row_x = row_dst % ne1; const int channel_x = row_dst / ne1; const int idst = row_dst*ne0 + i0; const int ix = channel_x*s2 + row_x*s1 + i0; + if (i0 >= n_dims) { + dst[idst + 0] = x[ix + 0]; + dst[idst + 1] = x[ix + 1]; + + return; + } + const float theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f); const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f; @@ -94,21 +92,19 @@ static __global__ void rope_neox( const int row_dst = blockDim.x*blockIdx.x + threadIdx.x; - if (i0 >= n_dims) { - const int i = row_dst*ne0 + i0; - - dst[i + 0] = x[i + 0]; - dst[i + 1] = x[i + 1]; - - return; - } - const int row_x = row_dst % ne1; const int channel_x = row_dst / ne1; const int idst = row_dst*ne0 + i0/2; const int ix = channel_x*s2 + row_x*s1 + i0/2; + if (i0 >= n_dims) { + dst[idst + i0/2 + 0] = x[ix + i0/2 + 0]; + dst[idst + i0/2 + 1] = x[ix + i0/2 + 1]; + + return; + } + const float theta_base = pos[channel_x]*powf(theta_scale, i0/2.0f); const float freq_factor = has_ff ? freq_factors[i0/2] : 1.0f; @@ -138,21 +134,19 @@ static __global__ void rope_multi( const int row_dst = blockDim.x*blockIdx.x + threadIdx.x; - if (i0 >= n_dims) { - const int i = row_dst*ne0 + i0; - - dst[i + 0] = x[i + 0]; - dst[i + 1] = x[i + 1]; - - return; - } - const int row_x = row_dst % ne1; const int channel_x = row_dst / ne1; const int idst = row_dst*ne0 + i0/2; const int ix = channel_x*s2 + row_x*s1 + i0/2; + if (i0 >= n_dims) { + dst[idst + i0/2 + 0] = x[ix + i0/2 + 0]; + dst[idst + i0/2 + 1] = x[ix + i0/2 + 1]; + + return; + } + const int sect_dims = sections.v[0] + sections.v[1] + sections.v[2] + sections.v[3]; const int sec_w = sections.v[1] + sections.v[0]; const int sector = (i0 / 2) % sect_dims; diff --git a/ggml/src/ggml-cuda/scale.cu b/ggml/src/ggml-cuda/scale.cu index 1405e066e86a2..2ee9e588992f4 100644 --- a/ggml/src/ggml-cuda/scale.cu +++ b/ggml/src/ggml-cuda/scale.cu @@ -1,18 +1,18 @@ #include "scale.cuh" -static __global__ void scale_f32(const float * x, float * dst, const float scale, const int k) { +static __global__ void scale_f32(const float * x, float * dst, const float scale, const float bias, const int k) { const int i = blockDim.x*blockIdx.x + threadIdx.x; if (i >= k) { return; } - dst[i] = scale * x[i]; + dst[i] = scale * x[i] + bias; } -static void scale_f32_cuda(const float * x, float * dst, const float scale, const int k, cudaStream_t stream) { +static void scale_f32_cuda(const float * x, float * dst, const float scale, const float bias, const int k, cudaStream_t stream) { const int num_blocks = (k + CUDA_SCALE_BLOCK_SIZE - 1) / CUDA_SCALE_BLOCK_SIZE; - scale_f32<<>>(x, dst, scale, k); + scale_f32<<>>(x, dst, scale, bias, k); } void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { @@ -25,7 +25,9 @@ void ggml_cuda_op_scale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { GGML_ASSERT( dst->type == GGML_TYPE_F32); float scale; - memcpy(&scale, dst->op_params, sizeof(float)); + float bias; + memcpy(&scale, (float *) dst->op_params + 0, sizeof(float)); + memcpy(&bias, (float *) dst->op_params + 1, sizeof(float)); - scale_f32_cuda(src0_d, dst_d, scale, ggml_nelements(src0), stream); + scale_f32_cuda(src0_d, dst_d, scale, bias, ggml_nelements(src0), stream); } diff --git a/ggml/src/ggml-cuda/set-rows.cu b/ggml/src/ggml-cuda/set-rows.cu new file mode 100644 index 0000000000000..58cee9244018f --- /dev/null +++ b/ggml/src/ggml-cuda/set-rows.cu @@ -0,0 +1,151 @@ +#include "set-rows.cuh" + +typedef void (*set_rows_kernel_t)(const char * src, char * dst); + +template +__device__ void set_rows_1(const src_t * src_f, dst_t * dst_f) { + GGML_UNUSED(src_f); + GGML_UNUSED(dst_f); +} + +template<> +__device__ __forceinline__ void set_rows_1(const float * src_f, half * dst_h) { + *dst_h = __float2half(*src_f); +} + +template<> +__device__ __forceinline__ void set_rows_1(const float * src_f, nv_bfloat16 * dst_b) { + *dst_b = *src_f; +} + +template<> +__device__ __forceinline__ void set_rows_1(const float * src_f, float * dst_f) { + *dst_f = *src_f; +} + +template +static __global__ void k_set_rows( + const src_t * __restrict__ src0, const int64_t * __restrict__ src1, dst_t * __restrict__ dst, + const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03, + const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t ne13, + const int64_t s01, const int64_t s02, const int64_t s03, + const int64_t s10, const int64_t s11, const int64_t s12, + const int64_t s1, const int64_t s2, const int64_t s3) { + + const int64_t i = int64_t(blockDim.x) * blockIdx.x + threadIdx.x; + const int64_t ne_total = ne00 * ne01 * ne02 * ne03; + + if (i >= ne_total) { + return; + } + + const int64_t i03 = i / (ne00 * ne01 * ne02); + const int64_t i02 = (i - i03 * ne00 * ne01 * ne02) / (ne00 * ne01); + const int64_t i01 = (i - i03 * ne00 * ne01 * ne02 - i02 * ne00 * ne01) / ne00; + const int64_t i00 = i - i03 * ne00 * ne01 * ne02 - i02 * ne00 * ne01 - i01 * ne00; + + const int64_t i12 = i03 % ne12; + const int64_t i11 = i02 % ne11; + const int64_t i10 = i01; + + const int64_t dst_row = *(src1 + i10*s10 + i11*s11 + i12*s12); + + const src_t * src0_row = src0 + i01*s01 + i02*s02 + i03*s03; + dst_t * dst_row_ptr = dst + dst_row*s1 + i02*s2 + i03*s3; + + const src_t* src_elem = src0_row + i00; + dst_t* dst_elem = dst_row_ptr + i00; + set_rows_1(src_elem, dst_elem); + + GGML_UNUSED(ne10); + GGML_UNUSED(ne13); +} + +template +static void set_rows_cuda( + const src_t * src0_d, const int64_t * src1_d, dst_t * dst_d, + const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03, + const int64_t ne10, const int64_t ne11, const int64_t ne12, const int64_t ne13, + const size_t nb01, const size_t nb02, const size_t nb03, + const size_t nb10, const size_t nb11, const size_t nb12, + const size_t nb1, const size_t nb2, const size_t nb3, + cudaStream_t stream) { + + const int64_t ne_total = ne00 * ne01 * ne02 * ne03; + const int num_blocks = (ne_total + CUDA_SET_ROWS_BLOCK_SIZE - 1) / CUDA_SET_ROWS_BLOCK_SIZE; + const dim3 block_size(CUDA_SET_ROWS_BLOCK_SIZE); + const dim3 grid_size(num_blocks); + + + const int64_t s01 = nb01/sizeof(src_t); + const int64_t s02 = nb02/sizeof(src_t); + const int64_t s03 = nb03/sizeof(src_t); + const int64_t s10 = nb10/sizeof(int64_t); + const int64_t s11 = nb11/sizeof(int64_t); + const int64_t s12 = nb12/sizeof(int64_t); + const int64_t s1 = nb1/sizeof(dst_t); + const int64_t s2 = nb2/sizeof(dst_t); + const int64_t s3 = nb3/sizeof(dst_t); + + if (ne_total > 0) { + k_set_rows<<>>( + src0_d, src1_d, dst_d, + ne00, ne01, ne02, ne03, + ne10, ne11, ne12, ne13, + s01, s02, s03, + s10, s11, s12, + s1, s2, s3); + } +} + + +void ggml_cuda_op_set_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + + GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src1->type == GGML_TYPE_I64); + + GGML_TENSOR_BINARY_OP_LOCALS + + const float * src0_d = (const float *)src0->data; + const int64_t * src1_d = (const int64_t *)src1->data; + + cudaStream_t stream = ctx.stream(); + + + + if (dst->type == GGML_TYPE_F32) { + set_rows_cuda( + src0_d, src1_d, (float*)dst->data, + ne00, ne01, ne02, ne03, + ne10, ne11, ne12, ne13, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + stream + ); + } else if (dst->type == GGML_TYPE_F16) { + set_rows_cuda( + src0_d, src1_d, (half*)dst->data, + ne00, ne01, ne02, ne03, + ne10, ne11, ne12, ne13, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + stream + ); + } else if (dst->type == GGML_TYPE_BF16) { + set_rows_cuda( + src0_d, src1_d, (nv_bfloat16*)dst->data, + ne00, ne01, ne02, ne03, + ne10, ne11, ne12, ne13, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + stream + ); + } else { + GGML_ABORT("unsupported type"); + } +} diff --git a/ggml/src/ggml-cuda/set-rows.cuh b/ggml/src/ggml-cuda/set-rows.cuh new file mode 100644 index 0000000000000..c140c0873c8a8 --- /dev/null +++ b/ggml/src/ggml-cuda/set-rows.cuh @@ -0,0 +1,7 @@ +#pragma once + +#include "common.cuh" + +#define CUDA_SET_ROWS_BLOCK_SIZE 256 + +void ggml_cuda_op_set_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/softmax.cu b/ggml/src/ggml-cuda/softmax.cu index aac6e0999880a..14543e978cf0f 100644 --- a/ggml/src/ggml-cuda/softmax.cu +++ b/ggml/src/ggml-cuda/softmax.cu @@ -2,6 +2,7 @@ #include "ggml.h" #include "softmax.cuh" #include +#include template static __device__ __forceinline__ float t2f32(T val) { @@ -13,6 +14,29 @@ __device__ float __forceinline__ t2f32(half val) { return __half2float(val); } +struct soft_max_params { + + int64_t nheads; + uint32_t n_head_log2; + int64_t ncols; + int64_t nrows_x; + int64_t nrows_y; + int64_t ne00; + int64_t ne01; + int64_t ne02; + int64_t ne03; + int64_t nb11; + int64_t nb12; + int64_t nb13; + + int64_t ne12; + int64_t ne13; + float scale; + float max_bias; + float m0; + float m1; +}; + // When ncols_template == 0 the bounds for the loops in this function are not known and can't be unrolled. // As we want to keep pragma unroll for all other cases we supress the clang transformation warning here. #ifdef __clang__ @@ -21,16 +45,24 @@ __device__ float __forceinline__ t2f32(half val) { #endif // __clang__ template static __global__ void soft_max_f32( - const float * x, const T * mask, float * dst, const int ncols_par, const int nrows_y, - const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2) { - const int ncols = ncols_template == 0 ? ncols_par : ncols_template; + const float * x, const T * mask, float * dst, const soft_max_params p) { + const int ncols = ncols_template == 0 ? p.ncols : ncols_template; const int tid = threadIdx.x; - const int rowx = blockIdx.x; - const int rowy = rowx % nrows_y; // broadcast the mask in the row dimension + + const int64_t i03 = blockIdx.z; + const int64_t i02 = blockIdx.y; + const int64_t i01 = blockIdx.x; + + //TODO: noncontigous inputs/outputs + const int rowx = blockIdx.x + blockIdx.y * gridDim.x + blockIdx.z * gridDim.x * gridDim.y; + + const int64_t i11 = i01; + const int64_t i12 = i02 % p.ne12; + const int64_t i13 = i03 % p.ne13; x += int64_t(rowx)*ncols; - mask += int64_t(rowy)*ncols * (mask != nullptr); + mask += (i11*p.nb11 + i12*p.nb12 + i13*p.nb13) / sizeof(T) * (mask != nullptr); dst += int64_t(rowx)*ncols; const int block_size = block_size_template == 0 ? blockDim.x : block_size_template; @@ -38,7 +70,7 @@ static __global__ void soft_max_f32( const int warp_id = threadIdx.x / WARP_SIZE; const int lane_id = threadIdx.x % WARP_SIZE; - const float slope = get_alibi_slope(max_bias, rowx/nrows_y, n_head_log2, m0, m1); + const float slope = get_alibi_slope(p.max_bias, i02, p.n_head_log2, p.m0, p.m1); extern __shared__ float data_soft_max_f32[]; float * buf_iw = data_soft_max_f32; // shared memory buffer for inter-warp communication @@ -55,7 +87,7 @@ static __global__ void soft_max_f32( break; } - const float val = x[col]*scale + (mask ? slope*t2f32(mask[col]) : 0.0f); + const float val = x[col]*p.scale + (mask ? slope*t2f32(mask[col]) : 0.0f); vals[col] = val; max_val = max(max_val, val); @@ -150,64 +182,58 @@ static __global__ void soft_max_back_f32( } } +template +static void launch_soft_max_kernels(const float * x, const T * mask, float * dst, + const soft_max_params & p, cudaStream_t stream, dim3 block_dims, dim3 block_nums, size_t nbytes_shared) +{ + const int id = ggml_cuda_get_device(); + const size_t smpbo = ggml_cuda_info().devices[id].smpbo; + + auto launch_kernel = [=](auto I) -> bool { + constexpr int ncols = decltype(I)::value; + constexpr int block = (ncols > 1024 ? 1024 : ncols); + + if (p.ncols == ncols) { + CUDA_SET_SHARED_MEMORY_LIMIT((soft_max_f32), smpbo); + soft_max_f32<<>> + (x, mask, dst, p); + return true; + } + return false; + }; + + // unary fold over launch_kernel + if ((launch_kernel(std::integral_constant{}) || ...)) { + return; + } + + //default case + CUDA_SET_SHARED_MEMORY_LIMIT((soft_max_f32), smpbo); + soft_max_f32<<>>(x, mask, dst, p); +} + + template -static void soft_max_f32_cuda(const float * x, const T * mask, float * dst, const int ncols_x, const int nrows_x, const int nrows_y, const float scale, const float max_bias, cudaStream_t stream) { +static void soft_max_f32_cuda(const float * x, const T * mask, float * dst, const soft_max_params & params, cudaStream_t stream) { int nth = WARP_SIZE; + const int64_t ncols_x = params.ncols; + while (nth < ncols_x && nth < CUDA_SOFT_MAX_BLOCK_SIZE) nth *= 2; const dim3 block_dims(nth, 1, 1); - const dim3 block_nums(nrows_x, 1, 1); + const dim3 block_nums(params.ne01, params.ne02, params.ne03); const size_t nbytes_shared = (GGML_PAD(ncols_x, WARP_SIZE) + WARP_SIZE)*sizeof(float); static_assert(CUDA_SOFT_MAX_BLOCK_SIZE == 1024, "These values need to be adjusted."); - const uint32_t n_head = nrows_x/nrows_y; - const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); - const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); - const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); + const int id = ggml_cuda_get_device(); + const size_t smpbo = ggml_cuda_info().devices[id].smpbo; - // FIXME: this limit could be raised by ~2-4x on Ampere or newer - if (nbytes_shared < ggml_cuda_info().devices[ggml_cuda_get_device()].smpb) { - switch (ncols_x) { - case 32: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 64: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 128: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 256: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 512: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 1024: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 2048: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - case 4096: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - default: - soft_max_f32<<>> - (x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); - break; - } + + if (nbytes_shared <= smpbo) { + launch_soft_max_kernels<32, 64, 128, 256, 512, 1024, 2048, 4096>(x, mask, dst, params, stream, block_dims, block_nums, nbytes_shared); } else { const size_t nbytes_shared_low = WARP_SIZE*sizeof(float); - soft_max_f32<<>>(x, mask, dst, ncols_x, nrows_y, scale, max_bias, m0, m1, n_head_log2); + soft_max_f32<<>>(x, mask, dst, params); } } @@ -235,10 +261,11 @@ void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { GGML_ASSERT(!src1 || src1->type == GGML_TYPE_F16 || src1->type == GGML_TYPE_F32); // src1 contains mask and it is optional - const int64_t ne00 = src0->ne[0]; const int64_t nrows_x = ggml_nrows(src0); const int64_t nrows_y = src0->ne[1]; + const int64_t ne00 = src0->ne[0]; + float scale = 1.0f; float max_bias = 0.0f; @@ -247,10 +274,44 @@ void ggml_cuda_op_soft_max(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16); + const int64_t nb11 = src1 ? src1->nb[1] : 1; + const int64_t nb12 = src1 ? src1->nb[2] : 1; + const int64_t nb13 = src1 ? src1->nb[3] : 1; + + const int64_t ne12 = src1 ? src1->ne[2] : 1; + const int64_t ne13 = src1 ? src1->ne[3] : 1; + + const uint32_t n_head = src0->ne[2]; + const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); + + const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); + const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); + + + soft_max_params params = {}; + params.nheads = src0->ne[2]; + params.n_head_log2 = n_head_log2; + params.ncols = ne00; + params.nrows_x = nrows_x; + params.nrows_y = nrows_y; + params.ne00 = src0->ne[0]; + params.ne01 = src0->ne[1]; + params.ne02 = src0->ne[2]; + params.ne03 = src0->ne[3]; + params.nb11 = nb11; + params.nb12 = nb12; + params.nb13 = nb13; + params.ne12 = ne12; + params.ne13 = ne13; + params.scale = scale; + params.max_bias = max_bias; + params.m0 = m0; + params.m1 = m1; + if (use_f16) { - soft_max_f32_cuda(src0_d, (const half *) src1_d, dst_d, ne00, nrows_x, nrows_y, scale, max_bias, stream); + soft_max_f32_cuda(src0_d, (const half *) src1_d, dst_d, params, stream); } else { - soft_max_f32_cuda(src0_d, (const float *) src1_d, dst_d, ne00, nrows_x, nrows_y, scale, max_bias, stream); + soft_max_f32_cuda(src0_d, (const float *) src1_d, dst_d, params, stream); } } diff --git a/ggml/src/ggml-cuda/ssm-conv.cu b/ggml/src/ggml-cuda/ssm-conv.cu index f637571963730..41979733601d2 100644 --- a/ggml/src/ggml-cuda/ssm-conv.cu +++ b/ggml/src/ggml-cuda/ssm-conv.cu @@ -107,8 +107,11 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int if (nc == 4) { ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); + } else if (nc == 3) { + ssm_conv_f32<<>>(src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, + dst, dst_nb0, dst_nb1, dst_nb2, n_t); } else { - GGML_ABORT("Only support kernel size = 4 now."); + GGML_ABORT("Only support kernel size = 3 or size = 4 right now."); } } else { if (nc == 4) { @@ -116,8 +119,13 @@ static void ssm_conv_f32_cuda(const float * src0, const float * src1, const int dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t); ssm_conv_long_token_f32<<>>( src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); + } else if (nc == 3) { + const int64_t split_n_t = 32; + dim3 blocks(n_s, (nr + threads - 1) / threads, (n_t + split_n_t - 1) / split_n_t); + ssm_conv_long_token_f32<<>>( + src0, src1, src0_nb0, src0_nb1, src0_nb2, src1_nb1, dst, dst_nb0, dst_nb1, dst_nb2, n_t); } else { - GGML_ABORT("Only support kernel size = 4 right now."); + GGML_ABORT("Only support kernel size = 3 or size = 4 right now."); } } } diff --git a/ggml/src/ggml-cuda/ssm-scan.cu b/ggml/src/ggml-cuda/ssm-scan.cu index 37ee208c09d46..c9184398b422c 100644 --- a/ggml/src/ggml-cuda/ssm-scan.cu +++ b/ggml/src/ggml-cuda/ssm-scan.cu @@ -4,14 +4,15 @@ template __global__ void __launch_bounds__(splitD, 2) ssm_scan_f32(const float * __restrict__ src0, const float * __restrict__ src1, const float * __restrict__ src2, const float * __restrict__ src3, const float * __restrict__ src4, const float * __restrict__ src5, - const int src0_nb1, const int src0_nb2, const int src1_nb0, const int src1_nb1, const int src1_nb2, - const int src1_nb3, const int src2_nb0, const int src2_nb1, const int src2_nb2, const int src3_nb1, - const int src4_nb1, const int src4_nb2, const int src5_nb1, const int src5_nb2, - float * __restrict__ dst, const int64_t L) { - GGML_UNUSED(src1_nb0); - GGML_UNUSED(src2_nb0); - const int bidx = blockIdx.x; // split along B - const int bidy = blockIdx.y; // split along D + const int32_t * __restrict__ src6, float * __restrict__ dst, + const int src0_nb2, const int src0_nb3, const int src1_nb2, const int src1_nb3, + const int src2_nb1, const int src2_nb2, const int src3_nb1, + const int src4_nb2, const int src4_nb3, const int src5_nb2, const int src5_nb3, + const int64_t s_off, const int64_t d_inner, const int64_t L) { + + constexpr int warp_size = ggml_cuda_get_physical_warp_size(); + const int bidx = blockIdx.x; // split along B (sequences) + const int bidy = blockIdx.y; // split along D (d_inner) const int tid = threadIdx.x; const int wid = tid / 32; const int wtid = tid % 32; @@ -22,38 +23,38 @@ __global__ void __launch_bounds__(splitD, 2) float * smem_A = smem; float * smem_s0 = smem_A + splitD * stride_sA; - const float * s0_block = (const float *) ((const char *) src0 + bidx * src0_nb2 + bidy * splitD * src0_nb1); - const float * x_block = (const float *) ((const char *) src1 + (bidx * src1_nb2) + bidy * splitD * sizeof(float)); + const float * s0_block = (const float *) ((const char *) src0 + src6[bidx] * src0_nb3 + bidy * splitD * src0_nb2); + const float * x_block = (const float *) ((const char *) src1 + (bidx * src1_nb3) + bidy * splitD * sizeof(float)); const float * dt_block = (const float *) ((const char *) src2 + (bidx * src2_nb2) + bidy * splitD * sizeof(float)); const float * A_block = (const float *) ((const char *) src3 + bidy * splitD * src3_nb1); - const float * B_block = (const float *) ((const char *) src4 + (bidx * src4_nb2)); - const float * C_block = (const float *) ((const char *) src5 + (bidx * src5_nb2)); - float * y_block = (float *) ((char *) dst + (bidx * src1_nb2) + bidy * splitD * sizeof(float)); - float * s_block = (float *) ((char *) dst + src1_nb3 + bidx * src0_nb2 + bidy * splitD * src0_nb1); + const float * B_block = (const float *) ((const char *) src4 + (bidx * src4_nb3)); + const float * C_block = (const float *) ((const char *) src5 + (bidx * src5_nb3)); + float * y_block = (float *) ((char *) dst + (bidx * d_inner * L * sizeof(float)) + bidy * splitD * sizeof(float)); + float * s_block = (float *) ((char *) dst + s_off + bidx * src0_nb3 + bidy * splitD * src0_nb2); - const int stride_s0 = src0_nb1 / sizeof(float); - const int stride_x = src1_nb1 / sizeof(float); + const int stride_s0 = src0_nb2 / sizeof(float); + const int stride_x = src1_nb2 / sizeof(float); const int stride_dt = src2_nb1 / sizeof(float); const int stride_A = src3_nb1 / sizeof(float); - const int stride_B = src4_nb1 / sizeof(float); - const int stride_C = src5_nb1 / sizeof(float); + const int stride_B = src4_nb2 / sizeof(float); + const int stride_C = src5_nb2 / sizeof(float); const int stride_s = stride_s0; - const int stride_y = stride_x; + const int stride_y = d_inner; // can N not be 16? for example 32? if (N == 16) { #pragma unroll for (size_t i = 0; i < splitD / 4; i += 2) { - float value = A_block[(wid * warpSize + i) * stride_A + wtid]; + float value = A_block[(wid * warp_size + i) * stride_A + wtid]; // todo: bank conflict // I am always confused with how to use the swizzling method to solve // bank conflit. Hoping somebody can tell me. - smem_A[(wid * warpSize + i) * stride_sA + wtid + ((wtid / 16) > 0 ? 1 : 0)] = value; + smem_A[(wid * warp_size + i) * stride_sA + wtid + ((wtid / 16) > 0 ? 1 : 0)] = value; } #pragma unroll for (size_t i = 0; i < splitD / 4; i += 2) { - float value = s0_block[(wid * warpSize + i) * stride_s0 + wtid]; - smem_s0[(wid * warpSize + i) * stride_ss0 + wtid + ((wtid / 16) > 0 ? 1 : 0)] = value; + float value = s0_block[(wid * warp_size + i) * stride_s0 + wtid]; + smem_s0[(wid * warp_size + i) * stride_ss0 + wtid + ((wtid / 16) > 0 ? 1 : 0)] = value; } } @@ -82,24 +83,167 @@ __global__ void __launch_bounds__(splitD, 2) } } +// assumes as many threads as d_state +template +__global__ void __launch_bounds__(d_state, 1) + ssm_scan_f32_group( + const float * __restrict__ src0, const float * __restrict__ src1, const float * __restrict__ src2, + const float * __restrict__ src3, const float * __restrict__ src4, const float * __restrict__ src5, + const int32_t * __restrict__ src6, float * __restrict__ dst, + const int src0_nb2, const int src0_nb3, const int src1_nb2, const int src1_nb3, + const int src2_nb1, const int src2_nb2, const int src3_nb1, + const int src4_nb2, const int src4_nb3, const int src5_nb2, const int src5_nb3, + const int64_t s_off, const int64_t n_head, const int64_t d_head, const int64_t n_group, const int64_t n_tok) { + + const int head_idx = (blockIdx.x * splitH) / d_head; + const int head_off = ((blockIdx.x * splitH) % d_head) * sizeof(float); + const int seq_idx = blockIdx.y; + + const int group_off = (head_idx & (n_group - 1)) * d_state * sizeof(float); + + const float * s0_block = (const float *) ((const char *) src0 + src6[seq_idx] * src0_nb3 + head_idx * src0_nb2 + head_off * d_state); + const float * x_block = (const float *) ((const char *) src1 + (seq_idx * src1_nb3) + blockIdx.x * splitH * sizeof(float)); + const float * dt_block = (const float *) ((const char *) src2 + (seq_idx * src2_nb2) + head_idx * sizeof(float)); + const float * A_block = (const float *) ((const char *) src3 + head_idx * src3_nb1); + const float * B_block = (const float *) ((const char *) src4 + (seq_idx * src4_nb3) + (group_off)); + const float * C_block = (const float *) ((const char *) src5 + (seq_idx * src5_nb3) + (group_off)); + float * y_block = dst + (seq_idx * n_tok * n_head * d_head) + blockIdx.x * splitH; + float * s_block = (float *) ((char *) dst + s_off + seq_idx * src0_nb3 + head_idx * src0_nb2 + head_off * d_state); + + // strides across n_seq_tokens + const int stride_x = src1_nb2 / sizeof(float); + const int stride_dt = src2_nb1 / sizeof(float); + const int stride_B = src4_nb2 / sizeof(float); + const int stride_C = src5_nb2 / sizeof(float); + const int stride_y = n_head * d_head; + + float state[splitH]; + // for the parallel accumulation + __shared__ float stateC[splitH * d_state]; + +#pragma unroll + for (int j = 0; j < splitH; j++) { + state[j] = s0_block[j * d_state + threadIdx.x]; + } + + for (int64_t i = 0; i < n_tok; i++) { + // TODO: only calculate dA and dt_soft_plus once per head instead of every splitH head elements + // TODO: only calculate B and C once per head group + // NOTE: dt_soft_plus, dA and x_dt have the same value across threads here. + float dt_soft_plus = dt_block[i * stride_dt]; + if (dt_soft_plus <= 20.0f) { + dt_soft_plus = log1pf(expf(dt_soft_plus)); + } + const float dA = expf(dt_soft_plus * A_block[0]); + const float B = B_block[i * stride_B + threadIdx.x]; + const float C = C_block[i * stride_C + threadIdx.x]; + + // across d_head +#pragma unroll + for (int j = 0; j < splitH; j++) { + const float x_dt = x_block[i * stride_x + j] * dt_soft_plus; + + state[j] = (state[j] * dA) + (B * x_dt); + + stateC[j * d_state + threadIdx.x] = state[j] * C; + } + + __syncthreads(); + + // parallel accumulation for stateC + // TODO: simplify + { + static_assert((d_state & -d_state) == d_state, "the state size has to be a power of 2"); + static_assert((splitH & -splitH) == splitH, "splitH has to be a power of 2"); + + // reduce until w matches the warp size + // TODO: does this work even when the physical warp size is 64? +#pragma unroll + for (int w = d_state; w > WARP_SIZE; w >>= 1) { + // (assuming there are d_state threads) +#pragma unroll + for (int j = 0; j < ((w >> 1) * splitH + d_state - 1) / d_state; j++) { + // TODO: check for bank conflicts + const int k = (threadIdx.x % (w >> 1)) + (d_state * (threadIdx.x / (w >> 1))) + j * d_state * (d_state / (w >> 1)); + stateC[k] += stateC[k + (w >> 1)]; + + } + __syncthreads(); + } + + static_assert(splitH >= d_state / WARP_SIZE); + +#pragma unroll + for (int j = 0; j < splitH / (d_state / WARP_SIZE); j++) { + float y = stateC[(threadIdx.x % WARP_SIZE) + d_state * (threadIdx.x / WARP_SIZE) + j * d_state * (d_state / WARP_SIZE)]; + y = warp_reduce_sum(y); + + // store the above accumulations + if (threadIdx.x % WARP_SIZE == 0) { + const int k = threadIdx.x / WARP_SIZE + j * (d_state / WARP_SIZE); + y_block[i * stride_y + k] = y; + } + } + } + } + + // write back the state +#pragma unroll + for (int j = 0; j < splitH; j++) { + s_block[j * d_state + threadIdx.x] = state[j]; + } +} + static void ssm_scan_f32_cuda(const float * src0, const float * src1, const float * src2, const float * src3, - const float * src4, const float * src5, const int src0_nb1, const int src0_nb2, - const int src1_nb0, const int src1_nb1, const int src1_nb2, const int src1_nb3, - const int src2_nb0, const int src2_nb1, const int src2_nb2, const int src3_nb1, - const int src4_nb1, const int src4_nb2, const int src5_nb1, const int src5_nb2, - float * dst, const int64_t N, const int64_t D, const int64_t L, const int64_t B, + const float * src4, const float * src5, const int32_t * src6, float * dst, + const int src0_nb2, const int src0_nb3, const int src1_nb2, const int src1_nb3, const int src2_nb1, + const int src2_nb2, const int src3_nb1, const int src4_nb2, const int src4_nb3, const int src5_nb2, + const int src5_nb3, const int64_t s_off, const int64_t d_state, const int64_t head_dim, + const int64_t n_head, const int64_t n_group, const int64_t n_tok, const int64_t n_seq, cudaStream_t stream) { - const int threads = 128; - // todo: consider D cannot be divided,does this situation exist? - GGML_ASSERT(D % threads == 0); - const dim3 blocks(B, (D + threads - 1) / threads, 1); - const int smem_size = (threads * (N + 1) * 2) * sizeof(float); - if (N == 16) { - ssm_scan_f32<128, 16><<>>( - src0, src1, src2, src3, src4, src5, src0_nb1, src0_nb2, src1_nb0, src1_nb1, src1_nb2, src1_nb3, src2_nb0, - src2_nb1, src2_nb2, src3_nb1, src4_nb1, src4_nb2, src5_nb1, src5_nb2, dst, L); + // NOTE: if you change conditions here, be sure to update the corresponding supports_op condition! + if (src3_nb1 == sizeof(float)) { + // Mamba-2 + if (d_state == 128) { + const int threads = 128; + GGML_ASSERT(d_state % threads == 0); + // NOTE: can be any power of two between 4 and 64 + const int splitH = 16; + GGML_ASSERT(head_dim % splitH == 0); + const dim3 blocks((n_head * head_dim + (splitH - 1)) / splitH, n_seq, 1); + ssm_scan_f32_group<16, 128><<>>( + src0, src1, src2, src3, src4, src5, src6, dst, + src0_nb2, src0_nb3, src1_nb2, src1_nb3, src2_nb1, src2_nb2, src3_nb1, + src4_nb2, src4_nb3, src5_nb2, src5_nb3, s_off, n_head, head_dim, n_group, n_tok); + } else if (d_state == 256) { // Falcon-H1 + const int threads = 256; + // NOTE: can be any power of two between 8 and 64 + const int splitH = 16; + GGML_ASSERT(head_dim % splitH == 0); + const dim3 blocks((n_head * head_dim + (splitH - 1)) / splitH, n_seq, 1); + ssm_scan_f32_group<16, 256><<>>( + src0, src1, src2, src3, src4, src5, src6, dst, + src0_nb2, src0_nb3, src1_nb2, src1_nb3, src2_nb1, src2_nb2, src3_nb1, + src4_nb2, src4_nb3, src5_nb2, src5_nb3, s_off, n_head, head_dim, n_group, n_tok); + } else { + GGML_ABORT("doesn't support d_state!=(128 or 256)."); + } } else { - GGML_ABORT("doesn't support N!=16."); + const int threads = 128; + // Mamba-1 + GGML_ASSERT(n_head % threads == 0); + GGML_ASSERT(head_dim == 1); + GGML_ASSERT(n_group == 1); + const dim3 blocks(n_seq, (n_head + threads - 1) / threads, 1); + const int smem_size = (threads * (d_state + 1) * 2) * sizeof(float); + if (d_state == 16) { + ssm_scan_f32<128, 16><<>>( + src0, src1, src2, src3, src4, src5, src6, dst, + src0_nb2, src0_nb3, src1_nb2, src1_nb3, src2_nb1, src2_nb2, + src3_nb1, src4_nb2, src4_nb3, src5_nb2, src5_nb3, s_off, n_head, n_tok); + } else { + GGML_ABORT("doesn't support d_state!=16."); + } } } @@ -110,30 +254,25 @@ void ggml_cuda_op_ssm_scan(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const struct ggml_tensor * src3 = dst->src[3]; // A const struct ggml_tensor * src4 = dst->src[4]; // B const struct ggml_tensor * src5 = dst->src[5]; // C - - // const int64_t d_state = src0->ne[0]; - // const int64_t d_inner = src0->ne[1]; - // const int64_t l = src1->ne[1]; - // const int64_t b = src0->ne[2]; + const struct ggml_tensor * src6 = dst->src[6]; // ids const int64_t nc = src0->ne[0]; // d_state - const int64_t nr = src0->ne[1]; // d_inner - const int64_t n_t = src1->ne[1]; // number of tokens per sequence - const int64_t n_s = src0->ne[2]; // number of sequences in the batch + const int64_t nr = src0->ne[1]; // head_dim or 1 + const int64_t nh = src1->ne[1]; // n_head + const int64_t ng = src4->ne[1]; // n_group + const int64_t n_t = src1->ne[2]; // number of tokens per sequence + const int64_t n_s = src1->ne[3]; // number of sequences in the batch + + const int64_t s_off = ggml_nelements(src1) * sizeof(float); - GGML_ASSERT(ggml_nelements(src1) + ggml_nelements(src0) == ggml_nelements(dst)); + GGML_ASSERT(ggml_nelements(src1) + nc*nr*nh*n_s == ggml_nelements(dst)); GGML_ASSERT(src0->nb[0] == sizeof(float)); GGML_ASSERT(src1->nb[0] == sizeof(float)); GGML_ASSERT(src2->nb[0] == sizeof(float)); GGML_ASSERT(src3->nb[0] == sizeof(float)); GGML_ASSERT(src4->nb[0] == sizeof(float)); GGML_ASSERT(src5->nb[0] == sizeof(float)); - // required for the dot product between s and C - GGML_ASSERT(src0->nb[1] == src0->ne[0] * sizeof(float)); - // required for per-sequence offsets for states - GGML_ASSERT(src0->nb[2] == src0->ne[0] * src0->ne[1] * sizeof(float)); - // required to get correct offset for state destination (i.e. src1->nb[3]) - GGML_ASSERT(src1->nb[3] == src1->ne[0] * src1->ne[1] * src1->ne[2] * sizeof(float)); + GGML_ASSERT(src6->nb[0] == sizeof(int32_t)); const float * src0_d = (const float *) src0->data; const float * src1_d = (const float *) src1->data; @@ -141,13 +280,16 @@ void ggml_cuda_op_ssm_scan(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const float * src3_d = (const float *) src3->data; const float * src4_d = (const float *) src4->data; const float * src5_d = (const float *) src5->data; + const int32_t * src6_d = (const int32_t *) src6->data; float * dst_d = (float *) dst->data; cudaStream_t stream = ctx.stream(); GGML_ASSERT(src0->type == GGML_TYPE_F32); + GGML_ASSERT(src6->type == GGML_TYPE_I32); GGML_ASSERT(dst->type == GGML_TYPE_F32); - ssm_scan_f32_cuda(src0_d, src1_d, src2_d, src3_d, src4_d, src5_d, src0->nb[1], src0->nb[2], src1->nb[0], - src1->nb[1], src1->nb[2], src1->nb[3], src2->nb[0], src2->nb[1], src2->nb[2], src3->nb[1], - src4->nb[1], src4->nb[2], src5->nb[1], src5->nb[2], dst_d, nc, nr, n_t, n_s, stream); + ssm_scan_f32_cuda(src0_d, src1_d, src2_d, src3_d, src4_d, src5_d, src6_d, dst_d, + src0->nb[2], src0->nb[3], src1->nb[2], src1->nb[3], src2->nb[1], src2->nb[2], + src3->nb[1], src4->nb[2], src4->nb[3], src5->nb[2], src5->nb[3], + s_off, nc, nr, nh, ng, n_t, n_s, stream); } diff --git a/ggml/src/ggml-cuda/sumrows.cu b/ggml/src/ggml-cuda/sumrows.cu index 38dbf1b5e1fa9..2eee08fa07375 100644 --- a/ggml/src/ggml-cuda/sumrows.cu +++ b/ggml/src/ggml-cuda/sumrows.cu @@ -1,25 +1,9 @@ #include "sumrows.cuh" -static __global__ void k_sum_rows_f32(const float * x, float * dst, const int ncols) { - const int row = blockIdx.x; - const int col = threadIdx.x; - - float sum = 0.0f; - for (int i = col; i < ncols; i += blockDim.x) { - sum += x[row * ncols + i]; - } - - sum = warp_reduce_sum(sum); - - if (col == 0) { - dst[row] = sum; - } -} - void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream) { const dim3 block_dims(WARP_SIZE, 1, 1); const dim3 block_nums(nrows, 1, 1); - k_sum_rows_f32<<>>(x, dst, ncols); + reduce_rows_f32<<>>(x, dst, ncols); } void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { @@ -35,5 +19,8 @@ void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const int64_t ncols = src0->ne[0]; const int64_t nrows = ggml_nrows(src0); - sum_rows_f32_cuda(src0_d, dst_d, ncols, nrows, stream); + const dim3 block_dims(WARP_SIZE, 1, 1); + const dim3 block_nums(nrows, 1, 1); + + reduce_rows_f32<<>>(src0_d, dst_d, ncols); } diff --git a/ggml/src/ggml-cuda/sumrows.cuh b/ggml/src/ggml-cuda/sumrows.cuh index 191db1c13167e..3431c599b1b89 100644 --- a/ggml/src/ggml-cuda/sumrows.cuh +++ b/ggml/src/ggml-cuda/sumrows.cuh @@ -1,5 +1,4 @@ #include "common.cuh" void sum_rows_f32_cuda(const float * x, float * dst, const int ncols, const int nrows, cudaStream_t stream); - void ggml_cuda_op_sum_rows(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml/src/ggml-cuda/unary.cu index 2c0375fbe3cf6..91c830c4dacc3 100644 --- a/ggml/src/ggml-cuda/unary.cu +++ b/ggml/src/ggml-cuda/unary.cu @@ -83,6 +83,10 @@ static __device__ __forceinline__ float op_log(float x) { return logf(x); } +static __device__ __forceinline__ float op_elu(float x) { + return (x > 0.f) ? x : expm1f(x); +} + template static __global__ void unary_op_kernel(const T * x, T * dst, const int k) { const int i = blockDim.x*blockIdx.x + threadIdx.x; @@ -196,6 +200,106 @@ void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { ggml_cuda_op_unary(ctx, dst); } +void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary(ctx, dst); +} +/* gated ops */ + +template +static __global__ void unary_gated_op_kernel(const T * x, const T * g, T * dst, const int64_t k, const int64_t n, const int64_t o0, const int64_t o1) { + const int64_t i = int64_t(blockDim.x)*blockIdx.x + threadIdx.x; + + if (i >= k) { + return; + } + + // perform base op and multiply with gate (either offset in same tensor or a separate one) + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + + dst[i] = (T)(op((float)x[j0]) * (float)g[j1]); +} + +template +static void unary_gated_cuda(const T * x, const T * g, T * dst, const int64_t k, const int64_t n, const int64_t o0, const int64_t o1, cudaStream_t stream) { + const int64_t num_blocks = (k + CUDA_GLU_BLOCK_SIZE - 1) / CUDA_GLU_BLOCK_SIZE; + unary_gated_op_kernel<<>>(x, g, dst, k, n, o0, o1); +} + +template +void ggml_cuda_op_unary_gated(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + void * src0_d = src0->data; + void * src1_d = src1 ? src1->data : src0->data; + const int64_t src0_o = src0->nb[1]; + const int64_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + void * dst_d = dst->data; + const int64_t nc = src1 ? src0->ne[0] : src0->ne[0] / 2; + cudaStream_t stream = ctx.stream(); + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + GGML_ASSERT(src0->nb[0] == ggml_element_size(src0)); + GGML_ASSERT(ggml_is_contiguous(dst)); + + GGML_ASSERT(src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); + GGML_ASSERT( dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); + GGML_ASSERT(src0->type == dst->type); + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_nrows(dst) == ggml_nrows(src0)); + + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src1->nb[0] == ggml_element_size(src1)); + GGML_ASSERT(src1->ne[0] == nc); + GGML_ASSERT(src0->type == src1->type); + } + + const int32_t swapped = ((const int32_t *) dst->op_params)[1]; + + if (src0->type == GGML_TYPE_F16) { + half * src0_p = (half *) src0_d; + half * src1_p = (half *) src1_d; + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + unary_gated_cuda(src0_p, src1_p, (half *)dst_d, ggml_nelements(dst), nc, src0_o / sizeof(half), src1_o / sizeof(half), stream); + } else { + float * src0_p = (float *) src0_d; + float * src1_p = (float *) src1_d; + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + unary_gated_cuda(src0_p, src1_p, (float *)dst_d, ggml_nelements(dst), nc, src0_o / sizeof(float), src1_o / sizeof(float), stream); + } +} + +void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary_gated(ctx, dst); +} + +void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary_gated(ctx, dst); +} + +void ggml_cuda_op_swiglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary_gated(ctx, dst); +} + +void ggml_cuda_op_geglu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary_gated(ctx, dst); +} + +void ggml_cuda_op_geglu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { + ggml_cuda_op_unary_gated(ctx, dst); +} + /* silu_back */ static __device__ __forceinline__ float op_silu_back(float grad, float x) { diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml/src/ggml-cuda/unary.cuh index 6686fc17e9193..cb14d16f8f3f5 100644 --- a/ggml/src/ggml-cuda/unary.cuh +++ b/ggml/src/ggml-cuda/unary.cuh @@ -15,6 +15,7 @@ #define CUDA_SQRT_BLOCK_SIZE 256 #define CUDA_SIN_BLOCK_SIZE 256 #define CUDA_COS_BLOCK_SIZE 256 +#define CUDA_GLU_BLOCK_SIZE 256 void ggml_cuda_op_abs(ggml_backend_cuda_context & ctx, ggml_tensor * dst); @@ -57,3 +58,15 @@ void ggml_cuda_op_sin(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_op_cos(ggml_backend_cuda_context & ctx, ggml_tensor * dst); void ggml_cuda_op_log(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_elu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_reglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_geglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_swiglu(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_geglu_erf(ggml_backend_cuda_context & ctx, ggml_tensor * dst); + +void ggml_cuda_op_geglu_quick(ggml_backend_cuda_context & ctx, ggml_tensor * dst); diff --git a/ggml/src/ggml-cuda/upscale.cu b/ggml/src/ggml-cuda/upscale.cu index 524e979574266..ef48aa5f97bcd 100644 --- a/ggml/src/ggml-cuda/upscale.cu +++ b/ggml/src/ggml-cuda/upscale.cu @@ -22,17 +22,88 @@ static __global__ void upscale_f32(const float * x, float * dst, dst[index] = *( (const float *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00) ); } +static __global__ void upscale_f32_bilinear(const float * x, float * dst, + const int nb00, const int nb01, const int nb02, const int nb03, + const int ne00_src, const int ne01_src, + const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst, + const float sf0, const float sf1, const float sf2, const float sf3, + const float pixel_offset) { + const int64_t index = threadIdx.x + blockIdx.x * blockDim.x; + const int64_t dst_total_elements = ne10_dst * ne11_dst * ne12_dst * ne13_dst; + + if (index >= dst_total_elements) { + return; + } + + const int i10_dst = index % ne10_dst; + const int i11_dst = (index / ne10_dst) % ne11_dst; + const int i12_dst = (index / (ne10_dst * ne11_dst)) % ne12_dst; + const int i13_dst = index / (ne10_dst * ne11_dst * ne12_dst); + + const int i02_src = (int)(i12_dst / sf2); + const int i03_src = (int)(i13_dst / sf3); + + const float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset; + int y0_src = (int)floorf(y_src_f); + int y1_src = y0_src + 1; + + y0_src = max(0, min(y0_src, ne01_src - 1)); + y1_src = max(0, min(y1_src, ne01_src - 1)); + + float dy = y_src_f - (float)y0_src; + dy = max(0.0f, min(dy, 1.0f)); + + float x_src_f = ((float)i10_dst + pixel_offset) / sf0 - pixel_offset; + int x0_src = (int)floorf(x_src_f); + int x1_src = x0_src + 1; + + x0_src = max(0, min(x0_src, ne00_src - 1)); + x1_src = max(0, min(x1_src, ne00_src - 1)); + + float dx = x_src_f - (float)x0_src; + dx = max(0.0f, min(dx, 1.0f)); + + const float * p_a = (const float *)((const char *)x + (int64_t)x0_src * nb00 + (int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03); + const float * p_b = (const float *)((const char *)x + (int64_t)x1_src * nb00 + (int64_t)y0_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03); + const float * p_c = (const float *)((const char *)x + (int64_t)x0_src * nb00 + (int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03); + const float * p_d = (const float *)((const char *)x + (int64_t)x1_src * nb00 + (int64_t)y1_src * nb01 + (int64_t)i02_src * nb02 + (int64_t)i03_src * nb03); + + const float val_a = *p_a; + const float val_b = *p_b; + const float val_c = *p_c; + const float val_d = *p_d; + + float result = val_a * (1.0f - dx) * (1.0f - dy) + + val_b * dx * (1.0f - dy) + + val_c * (1.0f - dx) * dy + + val_d * dx * dy; + + dst[index] = result; +} + static void upscale_f32_cuda(const float * x, float * dst, const int nb00, const int nb01, const int nb02, const int nb03, const int ne10, const int ne11, const int ne12, const int ne13, const float sf0, const float sf1, const float sf2, const float sf3, cudaStream_t stream) { - int dst_size = ne10 * ne11 * ne12 * ne13; - int num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE; + const int64_t dst_size = ne10 * ne11 * ne12 * ne13; + const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE; upscale_f32<<>>(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3); } +static void upscale_f32_bilinear_cuda(const float * x, float * dst, + const int nb00, const int nb01, const int nb02, const int nb03, + const int ne00_src, const int ne01_src, + const int ne10_dst, const int ne11_dst, const int ne12_dst, const int ne13_dst, + const float sf0, const float sf1, const float sf2, const float sf3, + const float pixel_offset, cudaStream_t stream) { + const int64_t dst_size = ne10_dst * ne11_dst * ne12_dst * ne13_dst; + const int64_t num_blocks = (dst_size + CUDA_UPSCALE_BLOCK_SIZE - 1) / CUDA_UPSCALE_BLOCK_SIZE; + + upscale_f32_bilinear<<>>(x, dst, nb00, nb01, nb02, nb03, ne00_src, ne01_src, ne10_dst, ne11_dst, ne12_dst, ne13_dst, sf0, sf1, sf2, sf3, pixel_offset); +} + void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { const ggml_tensor * src0 = dst->src[0]; const float * src0_d = (const float *)src0->data; @@ -42,10 +113,25 @@ void ggml_cuda_op_upscale(ggml_backend_cuda_context & ctx, ggml_tensor * dst) { GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); - const float sf0 = (float)dst->ne[0]/src0->ne[0]; - const float sf1 = (float)dst->ne[1]/src0->ne[1]; - const float sf2 = (float)dst->ne[2]/src0->ne[2]; + const int mode_flags = dst->op_params[0]; + const ggml_scale_mode mode = (ggml_scale_mode)(mode_flags & 0xFF); + + float sf0 = (float)dst->ne[0]/src0->ne[0]; + float sf1 = (float)dst->ne[1]/src0->ne[1]; + float sf2 = (float)dst->ne[2]/src0->ne[2]; const float sf3 = (float)dst->ne[3]/src0->ne[3]; - upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream); + if (mode == GGML_SCALE_MODE_NEAREST) { + upscale_f32_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, stream); + } else if (mode == GGML_SCALE_MODE_BILINEAR) { + float pixel_offset = 0.5f; + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { + sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1); + sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1); + pixel_offset = 0.0f; + } + upscale_f32_bilinear_cuda(src0_d, dst_d, src0->nb[0], src0->nb[1], src0->nb[2], src0->nb[3], + src0->ne[0], src0->ne[1], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], + sf0, sf1, sf2, sf3, pixel_offset, stream); + } } diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml/src/ggml-cuda/vendors/hip.h index 1a28831b7a96b..184d445f5c067 100644 --- a/ggml/src/ggml-cuda/vendors/hip.h +++ b/ggml/src/ggml-cuda/vendors/hip.h @@ -10,9 +10,6 @@ #include "rocblas/rocblas.h" #endif // __HIP_PLATFORM_AMD__ -#define CUBLAS_COMPUTE_16F HIPBLAS_R_16F -#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F -#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F #define CUBLAS_GEMM_DEFAULT HIPBLAS_GEMM_DEFAULT #define CUBLAS_GEMM_DEFAULT_TENSOR_OP HIPBLAS_GEMM_DEFAULT #define CUBLAS_OP_N HIPBLAS_OP_N @@ -30,7 +27,6 @@ #define CU_CHECK(fn) {hipError_t err = fn; if(err != hipSuccess) { GGML_ABORT("HipVMM Failure: %s\n", hipGetErrorString(err)); }} #define __shfl_sync(mask, var, laneMask, width) __shfl(var, laneMask, width) #define __shfl_xor_sync(mask, var, laneMask, width) __shfl_xor(var, laneMask, width) -#define cublasComputeType_t hipblasDatatype_t //deprecated, new hipblasComputeType_t not in 5.6 #define cublasCreate hipblasCreate #define cublasDestroy hipblasDestroy #define cublasGemmEx hipblasGemmEx @@ -42,7 +38,6 @@ #define cublasSgemm hipblasSgemm #define cublasStatus_t hipblasStatus_t #define cublasOperation_t hipblasOperation_t -#define cudaDataType_t hipblasDatatype_t //deprecated, new hipblasDatatype not in 5.6 #define cudaDeviceCanAccessPeer hipDeviceCanAccessPeer #define cudaDeviceDisablePeerAccess hipDeviceDisablePeerAccess #define cudaDeviceEnablePeerAccess hipDeviceEnablePeerAccess @@ -144,6 +139,20 @@ #define CUBLAS_STATUS_INTERNAL_ERROR HIPBLAS_STATUS_INTERNAL_ERROR #define CUBLAS_STATUS_NOT_SUPPORTED HIPBLAS_STATUS_NOT_SUPPORTED +#if defined(__HIP_PLATFORM_AMD__) && HIP_VERSION >= 70000000 +#define CUBLAS_COMPUTE_16F HIPBLAS_COMPUTE_16F +#define CUBLAS_COMPUTE_32F HIPBLAS_COMPUTE_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_COMPUTE_32F_FAST_16F +#define cublasComputeType_t hipblasComputeType_t +#define cudaDataType_t hipDataType +#else +#define CUBLAS_COMPUTE_16F HIPBLAS_R_16F +#define CUBLAS_COMPUTE_32F HIPBLAS_R_32F +#define CUBLAS_COMPUTE_32F_FAST_16F HIPBLAS_R_32F +#define cublasComputeType_t hipblasDatatype_t +#define cudaDataType_t hipblasDatatype_t +#endif + #define __CUDA_ARCH__ 1300 #if defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) diff --git a/ggml/src/ggml-hip/CMakeLists.txt b/ggml/src/ggml-hip/CMakeLists.txt index 1fe8fe3b8d079..e29df98560e07 100644 --- a/ggml/src/ggml-hip/CMakeLists.txt +++ b/ggml/src/ggml-hip/CMakeLists.txt @@ -113,6 +113,10 @@ if (GGML_HIP_ROCWMMA_FATTN) add_compile_definitions(GGML_HIP_ROCWMMA_FATTN) endif() +if (GGML_HIP_FORCE_ROCWMMA_FATTN_GFX12 OR ${hip_VERSION} VERSION_GREATER_EQUAL 7.0) + add_compile_definitions(GGML_HIP_ROCWMMA_FATTN_GFX12) +endif() + if (NOT GGML_CUDA_FA) add_compile_definitions(GGML_CUDA_NO_FA) endif() diff --git a/ggml/src/ggml-impl.h b/ggml/src/ggml-impl.h index 6dc5ce0d92fd8..4972558c98b81 100644 --- a/ggml/src/ggml-impl.h +++ b/ggml/src/ggml-impl.h @@ -301,6 +301,7 @@ struct ggml_cgraph { struct ggml_tensor ** grads; // the outputs of these tensors are the gradients of the nodes struct ggml_tensor ** grad_accs; // accumulators for node gradients struct ggml_tensor ** leafs; // tensors with constant data + int32_t * use_counts;// number of uses of each tensor, indexed by hash table slot struct ggml_hash_set visited_hash_set; @@ -317,203 +318,81 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph, int i0, int i1); GGML_API void * ggml_aligned_malloc(size_t size); GGML_API void ggml_aligned_free(void * ptr, size_t size); -// FP16 to FP32 conversion +// FP16 <-> FP32 +// ref: https://github.com/Maratyszcza/FP16 -// 16-bit float -// on Arm, we use __fp16 -// on x86, we use uint16_t -// -// for old CUDA compilers (<= 11), we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/10616 -// for MUSA compilers , we use uint16_t: ref https://github.com/ggml-org/llama.cpp/pull/11843 -// -#if defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) - #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) - #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) - - #define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) - - static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - __fp16 tmp; - memcpy(&tmp, &h, sizeof(ggml_fp16_t)); - return (float)tmp; - } - - static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { - ggml_fp16_t res; - __fp16 tmp = f; - memcpy(&res, &tmp, sizeof(ggml_fp16_t)); - return res; - } - -#elif defined(__F16C__) - - #ifdef _MSC_VER - #define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x))) - #define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0) - #else - #define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x) - #define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0) - #endif - -#elif defined(__POWER9_VECTOR__) - - #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) - #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) - /* the inline asm below is about 12% faster than the lookup method */ - #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) - #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) - - static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - float f; - double d; - __asm__( - "mtfprd %0,%2\n" - "xscvhpdp %0,%0\n" - "frsp %1,%0\n" : - /* temp */ "=d"(d), - /* out */ "=f"(f): - /* in */ "r"(h)); - return f; - } - - static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { - double d; - ggml_fp16_t r; - __asm__( /* xscvdphp can work on double or single precision */ - "xscvdphp %0,%2\n" - "mffprd %1,%0\n" : - /* temp */ "=d"(d), - /* out */ "=r"(r): - /* in */ "f"(f)); - return r; - } - -#elif defined(__riscv) && defined(__riscv_zfhmin) - - static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - float f; - __asm__( - "fmv.h.x %[f], %[h]\n\t" - "fcvt.s.h %[f], %[f]" - : [f] "=&f" (f) - : [h] "r" (h) - ); - return f; - } +static inline float fp32_from_bits(uint32_t w) { + union { + uint32_t as_bits; + float as_value; + } fp32; + fp32.as_bits = w; + return fp32.as_value; +} - static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { - ggml_fp16_t res; - __asm__( - "fcvt.h.s %[f], %[f]\n\t" - "fmv.x.h %[h], %[f]" - : [h] "=&r" (res) - : [f] "f" (f) - ); - return res; - } +static inline uint32_t fp32_to_bits(float f) { + union { + float as_value; + uint32_t as_bits; + } fp32; + fp32.as_value = f; + return fp32.as_bits; +} - #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) - #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) - #define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) - #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) +static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { + const uint32_t w = (uint32_t) h << 16; + const uint32_t sign = w & UINT32_C(0x80000000); + const uint32_t two_w = w + w; + const uint32_t exp_offset = UINT32_C(0xE0) << 23; +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) + const float exp_scale = 0x1.0p-112f; #else + const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); +#endif + const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; - // FP16 <-> FP32 - // ref: https://github.com/Maratyszcza/FP16 - - static inline float fp32_from_bits(uint32_t w) { - union { - uint32_t as_bits; - float as_value; - } fp32; - fp32.as_bits = w; - return fp32.as_value; - } - - static inline uint32_t fp32_to_bits(float f) { - union { - float as_value; - uint32_t as_bits; - } fp32; - fp32.as_value = f; - return fp32.as_bits; - } - - static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) { - const uint32_t w = (uint32_t) h << 16; - const uint32_t sign = w & UINT32_C(0x80000000); - const uint32_t two_w = w + w; - - const uint32_t exp_offset = UINT32_C(0xE0) << 23; - #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) - const float exp_scale = 0x1.0p-112f; - #else - const float exp_scale = fp32_from_bits(UINT32_C(0x7800000)); - #endif - const float normalized_value = fp32_from_bits((two_w >> 4) + exp_offset) * exp_scale; - - const uint32_t magic_mask = UINT32_C(126) << 23; - const float magic_bias = 0.5f; - const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; + const uint32_t magic_mask = UINT32_C(126) << 23; + const float magic_bias = 0.5f; + const float denormalized_value = fp32_from_bits((two_w >> 17) | magic_mask) - magic_bias; - const uint32_t denormalized_cutoff = UINT32_C(1) << 27; - const uint32_t result = sign | - (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); - return fp32_from_bits(result); - } - - static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { - #if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) - const float scale_to_inf = 0x1.0p+112f; - const float scale_to_zero = 0x1.0p-110f; - #else - const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); - const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); - #endif - float base = (fabsf(f) * scale_to_inf) * scale_to_zero; - - const uint32_t w = fp32_to_bits(f); - const uint32_t shl1_w = w + w; - const uint32_t sign = w & UINT32_C(0x80000000); - uint32_t bias = shl1_w & UINT32_C(0xFF000000); - if (bias < UINT32_C(0x71000000)) { - bias = UINT32_C(0x71000000); - } + const uint32_t denormalized_cutoff = UINT32_C(1) << 27; + const uint32_t result = sign | + (two_w < denormalized_cutoff ? fp32_to_bits(denormalized_value) : fp32_to_bits(normalized_value)); + return fp32_from_bits(result); +} - base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; - const uint32_t bits = fp32_to_bits(base); - const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); - const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); - const uint32_t nonsign = exp_bits + mantissa_bits; - return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); +static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) { +#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)) && (!defined(__cplusplus) || __cplusplus >= 201703L) + const float scale_to_inf = 0x1.0p+112f; + const float scale_to_zero = 0x1.0p-110f; +#else + const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000)); + const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000)); +#endif + float base = (fabsf(f) * scale_to_inf) * scale_to_zero; + + const uint32_t w = fp32_to_bits(f); + const uint32_t shl1_w = w + w; + const uint32_t sign = w & UINT32_C(0x80000000); + uint32_t bias = shl1_w & UINT32_C(0xFF000000); + if (bias < UINT32_C(0x71000000)) { + bias = UINT32_C(0x71000000); } - #define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) - #define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) - -#endif // defined(__ARM_NEON) && !(defined(__CUDACC__) && __CUDACC_VER_MAJOR__ <= 11) && !defined(__MUSACC__) - -// precomputed f32 table for f16 (256 KB) -// defined in ggml.c, initialized in ggml_init() -GGML_API float ggml_table_f32_f16[1 << 16]; - -// On ARM NEON, it's quicker to directly convert x -> x instead of calling into ggml_lookup_fp16_to_fp32, -// so we define GGML_FP16_TO_FP32 and GGML_FP32_TO_FP16 elsewhere for NEON. -// This is also true for POWER9. -#if !defined(GGML_FP16_TO_FP32) -inline static float ggml_lookup_fp16_to_fp32(ggml_fp16_t f) { - uint16_t s; - memcpy(&s, &f, sizeof(uint16_t)); - return ggml_table_f32_f16[s]; + base = fp32_from_bits((bias >> 1) + UINT32_C(0x07800000)) + base; + const uint32_t bits = fp32_to_bits(base); + const uint32_t exp_bits = (bits >> 13) & UINT32_C(0x00007C00); + const uint32_t mantissa_bits = bits & UINT32_C(0x00000FFF); + const uint32_t nonsign = exp_bits + mantissa_bits; + return (sign >> 16) | (shl1_w > UINT32_C(0xFF000000) ? UINT16_C(0x7E00) : nonsign); } -#define GGML_FP16_TO_FP32(x) ggml_lookup_fp16_to_fp32(x) -#endif +#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x) +#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x) -#if !defined(GGML_FP32_TO_FP16) +#define GGML_FP16_TO_FP32(x) GGML_COMPUTE_FP16_TO_FP32(x) #define GGML_FP32_TO_FP16(x) GGML_COMPUTE_FP32_TO_FP16(x) -#endif /** * Converts brain16 to float32. @@ -589,13 +468,76 @@ static inline ggml_bf16_t ggml_compute_fp32_to_bf16(float s) { #define GGML_FP32_TO_BF16(x) ggml_compute_fp32_to_bf16(x) #define GGML_BF16_TO_FP32(x) ggml_compute_bf16_to_fp32(x) +// return true if the node's results are only used by N other nodes +// and can be fused into their calculations. +static inline bool ggml_node_has_n_uses(const struct ggml_cgraph * cgraph, int node_idx, int32_t n_uses) { + const struct ggml_tensor * node = cgraph->nodes[node_idx]; + + // check the use count against how many we're replacing + size_t hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); + if (!ggml_bitset_get(cgraph->visited_hash_set.used, hash_pos) || cgraph->use_counts[hash_pos] != n_uses) { + return false; + } + + // if node is a view, some other node might be using the intermediate result + // via the view source. + if (node->view_src) { + return false; + } + + // If the user requested output for the node, can't fuse + if (node->flags & GGML_TENSOR_FLAG_OUTPUT) { + return false; + } + + return true; +} + +// Returns true if nodes [i, i+ops.size()) are the sequence of ggml_ops in ops[] +// and are fusable. Nodes are considered fusable according to this function if: +// - all nodes except the last have only one use and are not views/outputs (see ggml_node_has_N_uses). +// - all nodes except the last are a src of the following node. +// - all nodes are the same shape. +// TODO: Consider allowing GGML_OP_NONE nodes in between +static inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, const enum ggml_op * ops, int num_ops) { + if (node_idx + num_ops > cgraph->n_nodes) { + return false; + } + + for (int i = 0; i < num_ops; ++i) { + struct ggml_tensor * node = cgraph->nodes[node_idx + i]; + if (node->op != ops[i]) { + return false; + } + if (i < num_ops - 1 && !ggml_node_has_n_uses(cgraph, node_idx + i, 1)) { + return false; + } + if (i > 0) { + struct ggml_tensor * prev = cgraph->nodes[node_idx + i - 1]; + if (node->src[0] != prev && node->src[1] != prev) { + return false; + } + if (!ggml_are_same_shape(node, prev)) { + return false; + } + } + } + return true; +} + #ifdef __cplusplus } #endif #ifdef __cplusplus +#include #include +// nicer C++ syntax for ggml_can_fuse +inline bool ggml_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list ops) { + return ggml_can_fuse(cgraph, node_idx, ops.begin(), (int)ops.size()); +} + // expose GGUF internals for test code GGML_API size_t gguf_type_size(enum gguf_type type); GGML_API struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_params params); diff --git a/ggml/src/ggml-kompute/CMakeLists.txt b/ggml/src/ggml-kompute/CMakeLists.txt deleted file mode 100644 index c9109d5e8ee19..0000000000000 --- a/ggml/src/ggml-kompute/CMakeLists.txt +++ /dev/null @@ -1,166 +0,0 @@ - -find_package(Vulkan COMPONENTS glslc REQUIRED) -find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) - -if (NOT glslc_executable) - message(FATAL_ERROR "glslc not found") -endif() - -ggml_add_backend_library(ggml-kompute - ggml-kompute.cpp - ../../include/ggml-kompute.h - ) - -target_link_libraries(ggml-kompute PRIVATE ggml-base kompute) -target_include_directories(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) - -add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) - -function(compile_shader) - set(options) - set(oneValueArgs) - set(multiValueArgs SOURCES) - cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - foreach(source ${compile_shader_SOURCES}) - get_filename_component(filename ${source} NAME) - set(spv_file ${filename}.spv) - add_custom_command( - OUTPUT ${spv_file} - DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp - ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp - COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} - COMMENT "Compiling ${source} to ${spv_file}" - ) - - get_filename_component(RAW_FILE_NAME ${spv_file} NAME) - set(FILE_NAME "shader${RAW_FILE_NAME}") - string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME}) - string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE) - string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}") - set(OUTPUT_HEADER_FILE "${HEADER_FILE}") - message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}") - if(CMAKE_GENERATOR MATCHES "Visual Studio") - add_custom_command( - OUTPUT ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_BINARY_DIR}/bin/$/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - DEPENDS ${spv_file} xxd - COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$/xxd" - ) - else() - add_custom_command( - OUTPUT ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} - COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} - DEPENDS ${spv_file} xxd - COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd" - ) - endif() - endforeach() -endfunction() - -if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt") - message(STATUS "Kompute found") - set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level") - add_subdirectory(kompute) - - # Compile our shaders - compile_shader(SOURCES - kompute-shaders/op_scale.comp - kompute-shaders/op_scale_8.comp - kompute-shaders/op_add.comp - kompute-shaders/op_addrow.comp - kompute-shaders/op_mul.comp - kompute-shaders/op_silu.comp - kompute-shaders/op_relu.comp - kompute-shaders/op_gelu.comp - kompute-shaders/op_softmax.comp - kompute-shaders/op_norm.comp - kompute-shaders/op_rmsnorm.comp - kompute-shaders/op_diagmask.comp - kompute-shaders/op_mul_mat_mat_f32.comp - kompute-shaders/op_mul_mat_f16.comp - kompute-shaders/op_mul_mat_q8_0.comp - kompute-shaders/op_mul_mat_q4_0.comp - kompute-shaders/op_mul_mat_q4_1.comp - kompute-shaders/op_mul_mat_q4_k.comp - kompute-shaders/op_mul_mat_q6_k.comp - kompute-shaders/op_getrows_f32.comp - kompute-shaders/op_getrows_f16.comp - kompute-shaders/op_getrows_q4_0.comp - kompute-shaders/op_getrows_q4_1.comp - kompute-shaders/op_getrows_q6_k.comp - kompute-shaders/op_rope_norm_f16.comp - kompute-shaders/op_rope_norm_f32.comp - kompute-shaders/op_rope_neox_f16.comp - kompute-shaders/op_rope_neox_f32.comp - kompute-shaders/op_cpy_f16_f16.comp - kompute-shaders/op_cpy_f16_f32.comp - kompute-shaders/op_cpy_f32_f16.comp - kompute-shaders/op_cpy_f32_f32.comp - ) - - # Create a custom target for our generated shaders - add_custom_target(generated_shaders DEPENDS - shaderop_scale.h - shaderop_scale_8.h - shaderop_add.h - shaderop_addrow.h - shaderop_mul.h - shaderop_silu.h - shaderop_relu.h - shaderop_gelu.h - shaderop_softmax.h - shaderop_norm.h - shaderop_rmsnorm.h - shaderop_diagmask.h - shaderop_mul_mat_mat_f32.h - shaderop_mul_mat_f16.h - shaderop_mul_mat_q8_0.h - shaderop_mul_mat_q4_0.h - shaderop_mul_mat_q4_1.h - shaderop_mul_mat_q4_k.h - shaderop_mul_mat_q6_k.h - shaderop_getrows_f32.h - shaderop_getrows_f16.h - shaderop_getrows_q4_0.h - shaderop_getrows_q4_1.h - shaderop_getrows_q6_k.h - shaderop_rope_norm_f16.h - shaderop_rope_norm_f32.h - shaderop_rope_neox_f16.h - shaderop_rope_neox_f32.h - shaderop_cpy_f16_f16.h - shaderop_cpy_f16_f32.h - shaderop_cpy_f32_f16.h - shaderop_cpy_f32_f32.h - ) - - # Create a custom command that depends on the generated_shaders - add_custom_command( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp - DEPENDS generated_shaders - COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" - ) - - # Add the stamp to the main sources to ensure dependency tracking - target_sources(ggml-kompute PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) -else() - message(WARNING "Kompute not found") -endif() diff --git a/ggml/src/ggml-kompute/ggml-kompute.cpp b/ggml/src/ggml-kompute/ggml-kompute.cpp deleted file mode 100644 index 50579227183d3..0000000000000 --- a/ggml/src/ggml-kompute/ggml-kompute.cpp +++ /dev/null @@ -1,2251 +0,0 @@ -#include "ggml-impl.h" -#include "ggml-backend.h" -#include "ggml-backend-impl.h" -#include "ggml-kompute.h" - -// These are generated at build time by cmake custom command -#include "shaderop_scale.h" -#include "shaderop_scale_8.h" -#include "shaderop_add.h" -#include "shaderop_addrow.h" -#include "shaderop_mul.h" -#include "shaderop_silu.h" -#include "shaderop_relu.h" -#include "shaderop_gelu.h" -#include "shaderop_softmax.h" -#include "shaderop_norm.h" -#include "shaderop_rmsnorm.h" -#include "shaderop_diagmask.h" -#include "shaderop_mul_mat_f16.h" -#include "shaderop_mul_mat_q8_0.h" -#include "shaderop_mul_mat_q4_0.h" -#include "shaderop_mul_mat_q4_1.h" -#include "shaderop_mul_mat_q4_k.h" -#include "shaderop_mul_mat_q6_k.h" -#include "shaderop_mul_mat_mat_f32.h" -#include "shaderop_getrows_f32.h" -#include "shaderop_getrows_f16.h" -#include "shaderop_getrows_q4_0.h" -#include "shaderop_getrows_q4_1.h" -#include "shaderop_getrows_q6_k.h" -#include "shaderop_rope_norm_f16.h" -#include "shaderop_rope_norm_f32.h" -#include "shaderop_rope_neox_f16.h" -#include "shaderop_rope_neox_f32.h" -#include "shaderop_cpy_f16_f16.h" -#include "shaderop_cpy_f16_f32.h" -#include "shaderop_cpy_f32_f16.h" -#include "shaderop_cpy_f32_f32.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#ifdef __linux__ -#include // for setenv -#endif - -#define QK4_0 32 -#define QR4_0 2 -#define QK4_1 32 -#define QK_NL 16 - -typedef ggml_fp16_t half; - -static std::string ggml_kompute_format_name(int device) { - return "Kompute" + std::to_string(device); -} - -struct ggml_kompute_context { - int device; - std::string name; - std::shared_ptr pool; - - ggml_kompute_context(int device) - : device(device), name(ggml_kompute_format_name(device)) {} -}; - -// FIXME: It would be good to consolidate the kompute manager and the kompute context into one object -// and consolidate the init functions and simplify object lifetime management. As it currently stands, -// we *have* to have the kompute manager no matter what for device discovery, but the kompute context -// is only created when a device is set and vulkan is explicitly turned on. -static ggml_kompute_context *s_kompute_context = nullptr; - -class kompute_manager { - kp::Manager *s_mgr = nullptr; - -public: - kp::Manager *operator()() { - if (s_mgr && !s_mgr->hasInstance()) { - destroy(); - } - if (!s_mgr) { - s_mgr = new kp::Manager; - } - return s_mgr; - } - - void destroy() { - delete s_mgr; - s_mgr = nullptr; - } -}; - -static kompute_manager komputeManager; - -struct ggml_vk_memory { - void *data = nullptr; - size_t size = 0; - vk::DeviceMemory *primaryMemory = nullptr; - vk::Buffer *primaryBuffer = nullptr; - vk::DeviceMemory *stagingMemory = nullptr; - vk::Buffer *stagingBuffer = nullptr; -}; - -#ifdef __linux__ -__attribute__((constructor)) -static void enable_sam() { - setenv("RADV_PERFTEST", "sam", false); -} -#endif - -static bool ggml_vk_checkPhysicalDeviceFeatures(vk::PhysicalDevice physical_device) { - vk::PhysicalDeviceFeatures availableFeatures; - physical_device.getFeatures(&availableFeatures); - - if (!availableFeatures.shaderInt16) - return false; - - vk::PhysicalDeviceVulkan11Features availableFeatures11; - vk::PhysicalDeviceVulkan12Features availableFeatures12; - - availableFeatures11.pNext = &availableFeatures12; - availableFeatures12.pNext = nullptr; - - vk::PhysicalDeviceFeatures2 features2; - features2.pNext = &availableFeatures11; - - physical_device.getFeatures2(&features2); - - if (!availableFeatures11.uniformAndStorageBuffer16BitAccess || - !availableFeatures11.storageBuffer16BitAccess) { - return false; - } - - if (!availableFeatures12.storageBuffer8BitAccess || - !availableFeatures12.uniformAndStorageBuffer8BitAccess || - !availableFeatures12.shaderFloat16 || - !availableFeatures12.shaderInt8) { - return false; - } - - return true; -} - -static const char * ggml_vk_getVendorName(uint32_t vendorID) { - switch (vendorID) { - case 0x10DE: - return "nvidia"; - case 0x1002: - return "amd"; - case 0x8086: - return "intel"; - default: - return "unknown"; - } -} - -static std::vector ggml_vk_available_devices_internal(size_t memoryRequired) { - std::vector results; - if (!komputeManager()->hasVulkan() || !komputeManager()->hasInstance()) - return results; - - std::vector physical_devices; - try { - physical_devices = komputeManager()->listDevices(); - } catch (vk::SystemError & err) { - std::cerr << __func__ << ": ignoring Vulkan exception: " << err.what() << "\n"; - return results; - } - - uint32_t deviceCount = physical_devices.size(); - if (deviceCount == 0) - return results; - - std::unordered_map count_by_name; - - for (uint32_t i = 0; i < deviceCount; i++) { - const auto & physical_device = physical_devices[i]; - - VkPhysicalDeviceProperties dev_props = physical_device.getProperties(); - VkPhysicalDeviceMemoryProperties memoryProperties = physical_device.getMemoryProperties(); - const uint32_t major = VK_VERSION_MAJOR(dev_props.apiVersion); - const uint32_t minor = VK_VERSION_MINOR(dev_props.apiVersion); - if (major < 1 || minor < 2) - continue; - - if (!ggml_vk_checkPhysicalDeviceFeatures(physical_device)) - continue; - - size_t heapSize = 0; - for (uint32_t j = 0; j < memoryProperties.memoryHeapCount; ++j) { - VkMemoryHeap heap = memoryProperties.memoryHeaps[j]; - if (heap.flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) { - heapSize = heap.size; - break; - } - } - - if (heapSize < memoryRequired) - continue; - - auto ext_props = physical_device.enumerateDeviceExtensionProperties(); - bool has_maintenance4 = false; - - // Check if maintenance4 is supported - for (const auto & properties : ext_props) { - if (strcmp("VK_KHR_maintenance4", properties.extensionName) == 0) { - has_maintenance4 = true; - } - } - - vk::PhysicalDeviceSubgroupProperties subgroup_props; - vk::PhysicalDeviceProperties2 dev_props2; - vk::PhysicalDeviceMaintenance3Properties dev_props3; - vk::PhysicalDeviceMaintenance4Properties dev_props4; - dev_props2.pNext = &dev_props3; - dev_props3.pNext = &subgroup_props; - if (has_maintenance4) { - subgroup_props.pNext = &dev_props4; - } - physical_device.getProperties2(&dev_props2); - - if (subgroup_props.subgroupSize < 32) - continue; - - ggml_vk_device d; - d.index = i; - d.type = dev_props.deviceType; - d.heapSize = heapSize; - d.vendor = strdup(ggml_vk_getVendorName(dev_props.vendorID)); - d.subgroupSize = subgroup_props.subgroupSize; - d.bufferAlignment = dev_props.limits.minStorageBufferOffsetAlignment; - - if (has_maintenance4) { - d.maxAlloc = std::min(dev_props3.maxMemoryAllocationSize, dev_props4.maxBufferSize); - } else { - d.maxAlloc = dev_props3.maxMemoryAllocationSize; - } - - std::string name(dev_props.deviceName); - size_t n_idx = ++count_by_name[name]; - if (n_idx > 1) { - name += " (" + std::to_string(n_idx) + ")"; - } - d.name = strdup(name.c_str()); - - results.push_back(d); - } - - std::stable_sort(results.begin(), results.end(), - [](const ggml_vk_device& lhs, const ggml_vk_device& rhs) -> bool { - if (lhs.type != rhs.type) { - if (lhs.type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) return true; - if (rhs.type == VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU) return false; - - if (lhs.type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) return true; - if (rhs.type == VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU) return false; - } - return lhs.heapSize < rhs.heapSize; - } - ); - - return results; -} - -static std::vector& ggml_vk_available_devices() { - static std::vector devices = ggml_vk_available_devices_internal(0); - return devices; -} - -static void ggml_vk_filterByVendor(std::vector& devices, const std::string& targetVendor) { - devices.erase( - std::remove_if(devices.begin(), devices.end(), - [&targetVendor](const ggml_vk_device& device) { - return device.vendor != targetVendor; - }), - devices.end() - ); -} - -static void ggml_vk_filterByName(std::vector& devices, const std::string& targetName) { - devices.erase( - std::remove_if(devices.begin(), devices.end(), - [&targetName](const ggml_vk_device& device) { - return device.name != targetName; - }), - devices.end() - ); -} - -static bool ggml_vk_get_device(ggml_vk_device * device, size_t memoryRequired, const std::string & name) { - if (name.empty()) - return false; - - auto devices = ggml_vk_available_devices_internal(memoryRequired); - if (name == "amd" || name == "nvidia" || name == "intel") { - ggml_vk_filterByVendor(devices, name); - } else if (name != "gpu") { - ggml_vk_filterByName(devices, name); - } - - if (devices.empty()) - return false; - - *device = devices.front(); - return true; -} - -bool ggml_vk_get_device(ggml_vk_device * device, size_t memoryRequired, const char * name) { - return ggml_vk_get_device(device, memoryRequired, std::string(name)); -} - -bool ggml_vk_has_vulkan() { - return komputeManager()->hasVulkan(); -} - -bool ggml_vk_has_device() { - return komputeManager()->hasDevice(); -} - -ggml_vk_device ggml_vk_current_device() { - if (!komputeManager()->hasDevice()) - return ggml_vk_device(); - - auto devices = ggml_vk_available_devices(); - ggml_vk_filterByName(devices, komputeManager()->physicalDevice()->getProperties().deviceName.data()); - GGML_ASSERT(!devices.empty()); - return devices.front(); -} - -static -void ggml_vk_allocate_descriptor_pool(struct ggml_kompute_context * ctx, size_t size) { - std::vector descriptorPoolSizes = { - vk::DescriptorPoolSize( - vk::DescriptorType::eStorageBuffer, - 4 * size // Descriptor count is number of possible tensors to pass into an algorithm - ) - }; - - vk::DescriptorPoolCreateInfo descriptorPoolInfo( - vk::DescriptorPoolCreateFlags(), - size, // Max sets - static_cast(descriptorPoolSizes.size()), - descriptorPoolSizes.data()); - - ctx->pool = std::make_shared(); - vk::Result r = komputeManager()->device()->createDescriptorPool( - &descriptorPoolInfo, nullptr, ctx->pool.get()); - if (r != vk::Result::eSuccess) - std::cerr << "Error allocating descriptor pool" << vk::to_string(r); -} - -static -void ggml_vk_free_descriptor_pool(struct ggml_kompute_context * ctx) { - if (ctx->pool) { - komputeManager()->device()->destroy( - *ctx->pool, - (vk::Optional)nullptr); - ctx->pool = nullptr; - } -} - -static -vk::Buffer *ggml_vk_allocate_buffer(size_t size) { - vk::BufferCreateInfo bufferCreateInfo; - bufferCreateInfo.size = size; - bufferCreateInfo.usage = vk::BufferUsageFlagBits::eStorageBuffer | - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eTransferDst; - bufferCreateInfo.sharingMode = vk::SharingMode::eExclusive; - - vk::Buffer *vkBuffer = new vk::Buffer; - vk::Result r = komputeManager()->device()->createBuffer(&bufferCreateInfo, nullptr, vkBuffer); - if (r != vk::Result::eSuccess) - std::cerr << "Error allocating buffer " << vk::to_string(r) << std::endl; - return vkBuffer; -} - -static -vk::DeviceMemory *ggml_vk_allocate(size_t size, vk::MemoryPropertyFlags flags, vk::MemoryRequirements requirements, bool *isHostVisible) { - - uint32_t memoryTypeIndex = -1; - bool memoryTypeIndexFound = false; - vk::PhysicalDeviceMemoryProperties memoryProperties = komputeManager()->physicalDevice()->getMemoryProperties(); - for (uint32_t i = 0; i < memoryProperties.memoryTypeCount; i++) { - const vk::MemoryType &memoryType = memoryProperties.memoryTypes[i]; - const vk::MemoryHeap &memoryHeap = memoryProperties.memoryHeaps[memoryType.heapIndex]; - if (memoryHeap.size < size) { - continue; - } - - if (requirements.memoryTypeBits & (1 << i)) { - if (((memoryProperties.memoryTypes[i]).propertyFlags & - flags) == flags) { - memoryTypeIndex = i; - memoryTypeIndexFound = true; - if (isHostVisible && (memoryProperties.memoryTypes[i].propertyFlags & vk::MemoryPropertyFlagBits::eHostVisible)) { - *isHostVisible = true; - } - break; - } - } - } - if (!memoryTypeIndexFound) { - throw std::runtime_error( - "Memory type index for buffer creation not found"); - } - - vk::MemoryAllocateInfo allocInfo; - allocInfo.allocationSize = size; - allocInfo.memoryTypeIndex = memoryTypeIndex; - vk::DeviceMemory *vkDeviceMemory = new vk::DeviceMemory; - vk::Result r = komputeManager()->device()->allocateMemory(&allocInfo, nullptr, vkDeviceMemory); - if (r != vk::Result::eSuccess) { - std::cerr << "Error allocating memory " << vk::to_string(r) << std::endl; - throw std::runtime_error("Error allocating vulkan memory."); - } - return vkDeviceMemory; -} - -static size_t ggml_vk_aligned_offset(ggml_backend_buffer_t buffer, size_t offset) { - size_t minStorageBufferOffsetAlignment = ggml_backend_buffer_get_alignment(buffer); - - // If offset is already aligned, return it directly - if (offset % minStorageBufferOffsetAlignment == 0) { - return offset; - } - - // Otherwise, return the largest multiple of minStorageBufferOffsetAlignment less than offset - return (offset / minStorageBufferOffsetAlignment) * minStorageBufferOffsetAlignment; -} - -static ggml_vk_memory ggml_vk_allocate(size_t size) { - ggml_vk_memory memory; - bool isHostVisible = false; - { - memory.primaryBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.primaryBuffer); - vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eDeviceLocal; - memory.primaryMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - komputeManager()->device()->bindBufferMemory(*memory.primaryBuffer, *memory.primaryMemory, 0); - if (isHostVisible) { - vk::Result r = komputeManager()->device()->mapMemory(*memory.primaryMemory, 0, size, vk::MemoryMapFlags(), &memory.data); - if (r != vk::Result::eSuccess) - std::cerr << "Error mapping memory" << vk::to_string(r); - } - } - - if (!isHostVisible) { - memory.stagingBuffer = ggml_vk_allocate_buffer(size); - vk::MemoryRequirements memoryRequirements = komputeManager()->device()->getBufferMemoryRequirements(*memory.stagingBuffer); - vk::MemoryPropertyFlags memoryPropertyFlags = vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent | - vk::MemoryPropertyFlagBits::eHostCached; - memory.stagingMemory = ggml_vk_allocate(size, memoryPropertyFlags, memoryRequirements, &isHostVisible); - komputeManager()->device()->bindBufferMemory(*memory.stagingBuffer, *memory.stagingMemory, 0); - vk::Result r = komputeManager()->device()->mapMemory(*memory.stagingMemory, 0, size, vk::MemoryMapFlags(), &memory.data); - if (r != vk::Result::eSuccess) - std::cerr << "Error mapping memory" << vk::to_string(r); - } - - memory.size = size; - return memory; -} - -static void ggml_vk_free_memory(ggml_vk_memory &memory) -{ - komputeManager()->device()->destroy( - *memory.primaryBuffer, - (vk::Optional)nullptr); - if (memory.stagingBuffer) { - komputeManager()->device()->destroy( - *memory.stagingBuffer, - (vk::Optional)nullptr); - } - komputeManager()->device()->freeMemory( - *memory.primaryMemory, - (vk::Optional)nullptr); - if (memory.stagingMemory) { - komputeManager()->device()->freeMemory( - *memory.stagingMemory, - (vk::Optional)nullptr); - } -} - -static const char * ggml_backend_kompute_buffer_type_get_name(ggml_backend_buffer_type_t buft); - -static -ggml_vk_memory * ggml_vk_find_tensor(const struct ggml_tensor * t, uint64_t & offset) { - ggml_backend_buffer_t buffer = t->view_src ? t->view_src->buffer : t->buffer; - - // compatibility with ggml-backend - GGML_ASSERT(buffer && buffer->buft->iface.get_name == ggml_backend_kompute_buffer_type_get_name); - - ggml_vk_memory * buf_ctx = static_cast(buffer->context); - - const intptr_t ioffs = intptr_t(t->data) - intptr_t(buf_ctx->data); - - GGML_ASSERT(ioffs >= 0 && ioffs + int64_t(ggml_nbytes(t)) <= int64_t(buffer->size)); - - offset = uint64_t(ioffs); - return buf_ctx; -} - -static -const std::shared_ptr ggml_vk_get_tensor(const struct ggml_tensor * t, uint32_t * alignedOffset = nullptr) { - uint64_t originalOffset = 0; - auto * res = ggml_vk_find_tensor(t, originalOffset); - if (!res) { - static std::shared_ptr nullTensor = nullptr; - return nullTensor; - } - - // Create a tensor whose memory will be composed of our buffers at the correct offset - const size_t nelements = ggml_nelements(t); - size_t nbytes = ggml_nbytes(t); - - size_t vulkanOffset = ggml_vk_aligned_offset(t->buffer, originalOffset); - if (alignedOffset) { - *alignedOffset = originalOffset - vulkanOffset; - nbytes += *alignedOffset; - } - - return komputeManager()->tensor( - t->data, - nelements, - nbytes, kp::Tensor::TensorDataTypes::eFloat, - res->primaryMemory, res->primaryBuffer, - res->stagingMemory, res->stagingBuffer, - vulkanOffset); -} - -static std::vector getSpirvShader(const unsigned char* rawData, size_t size) { - if (size % sizeof(uint32_t) != 0) { - throw std::runtime_error("Invalid size: must be divisible by sizeof(uint32_t)"); - } - - const uint32_t* data_ptr = reinterpret_cast(rawData); - size_t count = size / sizeof(uint32_t); - return std::vector(data_ptr, data_ptr + count); -} - -inline static -uint32_t safe_divide(uint32_t a, uint32_t b) { - if (b <= 1) { - return a; - } - if ((a % b) != 0) { - fprintf(stderr, "((%u %% %u) == %u) != 0\n", a, b, a % b); - GGML_ABORT("safe_divide result would've had remainder"); - } - return a / b; -} - -static void ggml_vk_add( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, int32_t ne03, - int32_t nb00, int32_t nb01, int32_t nb02, int32_t nb03, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - int32_t nb10, int32_t nb11, int32_t nb12, int32_t nb13, - int32_t ne0, - int32_t nb0, int32_t nb1, int32_t nb2, int32_t nb3 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_add_comp_spv, - kp::shader_data::op_add_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00; - int32_t nb00, nb01, nb02, nb03; - int32_t ne10, ne11, ne12, ne13; - int32_t nb10, nb11, nb12, nb13; - int32_t ne0; - int32_t nb0, nb1, nb2, nb3; - } const pushConsts { - safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, - nb00, nb01, nb02, nb03, - ne10, ne11, ne12, ne13, - nb10, nb11, nb12, nb13, - ne0, - nb0, nb1, nb2, nb3 - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_addrow(kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - uint32_t size, uint32_t row = 0) { - - const static auto spirv = getSpirvShader(kp::shader_data::op_addrow_comp_spv, - kp::shader_data::op_addrow_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - uint32_t row; - } const pushConsts { - safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4), - row - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); - else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({size}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_mul( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, int32_t ne03, - int32_t nb00, int32_t nb01, int32_t nb02, int32_t nb03, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - int32_t nb10, int32_t nb11, int32_t nb12, int32_t nb13, - int32_t ne0, - int32_t nb0, int32_t nb1, int32_t nb2, int32_t nb3 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_comp_spv, - kp::shader_data::op_mul_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00; - int32_t nb00, nb01, nb02, nb03; - int32_t ne10, ne11, ne12, ne13; - int32_t nb10, nb11, nb12, nb13; - int32_t ne0; - int32_t nb0, nb1, nb2, nb3; - } const pushConsts { - safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, - nb00, nb01, nb02, nb03, - ne10, ne11, ne12, ne13, - nb10, nb11, nb12, nb13, - ne0, - nb0, nb1, nb2, nb3 - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_scale(kp::Sequence& seq, - const std::shared_ptr& in, - const std::shared_ptr& out, - uint32_t inOff, uint32_t outOff, - uint32_t size, float scale) { - const static auto spirv_1 = getSpirvShader( - kp::shader_data::op_scale_comp_spv, kp::shader_data::op_scale_comp_spv_len - ); - const static auto spirv_8 = getSpirvShader( - kp::shader_data::op_scale_8_comp_spv, kp::shader_data::op_scale_8_comp_spv_len - ); - - struct PushConstants { - uint32_t inOff, outOff; - float scale; - } const pushConsts { - safe_divide(inOff, 4), safe_divide(outOff, 4), - scale - }; - - const auto * spirv = &spirv_1; - std::string name(__func__); - if (size % 8 == 0) { - size /= 8; - name += "_8"; - spirv = &spirv_8; - } - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {in, out}, *spirv, {size}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({in, out}); - s_algo->setWorkgroup({size}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_xxlu( - const std::vector& spirv, const char * suffix, kp::Sequence& seq, - const std::shared_ptr& in, - const std::shared_ptr& out, - uint32_t inOff, uint32_t outOff, - uint32_t size -) { - struct PushConstants { - uint32_t inOff, outOff; - } const pushConsts { - safe_divide(inOff, 4), safe_divide(outOff, 4), - }; - - auto name = std::string(__func__) + "_" + suffix; - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {in, out}, spirv, {size}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({in, out}); - s_algo->setWorkgroup({size}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -template -static void ggml_vk_silu(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_silu_comp_spv, - kp::shader_data::op_silu_comp_spv_len); - - ggml_vk_xxlu(spirv, "silu", std::forward(args)...); -} - -template -static void ggml_vk_relu(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_relu_comp_spv, - kp::shader_data::op_relu_comp_spv_len); - - ggml_vk_xxlu(spirv, "relu", std::forward(args)...); -} - -template -static void ggml_vk_gelu(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_gelu_comp_spv, - kp::shader_data::op_gelu_comp_spv_len); - - ggml_vk_xxlu(spirv, "gelu", std::forward(args)...); -} - -static void ggml_vk_soft_max( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, uint32_t ne03, - float scale, float max_bias, float m0, float m1, - uint32_t n_head_log2 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_softmax_comp_spv, - kp::shader_data::op_softmax_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne01, ne02; - float scale, max_bias, m0, m1; - uint32_t n_head_log2; - int32_t mask; - } pushConsts { - safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne01, ne02, - scale, max_bias, m0, m1, - n_head_log2, - bool(inB) - }; - - auto & inB_ = inB ? inB : inA; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - // FIXME: The softmax kernel needs to be fixed to use the subgroupsize which can vary by device - const uint32_t local_x = 32; - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB_, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {local_x}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB_, out}); - s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_norm_( - const std::vector& spirv, const char * suffix, kp::Sequence& seq, - const std::shared_ptr& in, - const std::shared_ptr& out, - uint32_t inOff, uint32_t outOff, - int32_t ne00, int32_t nb01, - int32_t nrows, float epsilon -) { - GGML_ASSERT(nb01%sizeof(float) == 0); - GGML_ASSERT(ne00%sizeof(float) == 0); - - struct PushConstants { - uint32_t inOff, outOff; - uint32_t ne00, nb01; - float eps; - } pushConsts { - safe_divide(inOff, 4), safe_divide(outOff, 4), - (uint32_t)ne00, (uint32_t)nb01, epsilon - }; - - auto name = std::string(__func__) + "_" + suffix; - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {in, out}, spirv, {(uint32_t)nrows}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({in, out}); - s_algo->setWorkgroup({(uint32_t)nrows}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -template -static void ggml_vk_norm(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_norm_comp_spv, - kp::shader_data::op_norm_comp_spv_len); - - ggml_vk_norm_(spirv, "norm", std::forward(args)...); -} - -template -static void ggml_vk_rms_norm(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_rmsnorm_comp_spv, - kp::shader_data::op_rmsnorm_comp_spv_len); - - ggml_vk_norm_(spirv, "rms", std::forward(args)...); -} - -static void ggml_vk_diag_mask_inf(kp::Sequence& seq, - const std::shared_ptr& in, - const std::shared_ptr& out, - uint32_t inOff, uint32_t outOff, - uint32_t n_past, - int32_t ne00, int32_t ne01, int32_t ne02) { - const static auto spirv = getSpirvShader(kp::shader_data::op_diagmask_comp_spv, - kp::shader_data::op_diagmask_comp_spv_len); - - struct PushConstants { - uint32_t inOff, outOff; - uint32_t n_past; - int32_t ne00, ne01; - } pushConsts { - safe_divide(inOff, 4), safe_divide(outOff, 4), - n_past, - ne00, ne01 - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {in, out}, spirv, {unsigned(ne00), unsigned(ne01), unsigned(ne02)}, {}, {pushConsts}); - else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({in, out}); - s_algo->setWorkgroup({unsigned(ne00), unsigned(ne01), unsigned(ne02)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_mul_mat_f16( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, - uint32_t nb00, uint32_t nb01, uint32_t nb02, uint32_t nb03, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - uint32_t nb10, uint32_t nb11, uint32_t nb12, uint32_t nb13, - int32_t ne0, int32_t ne1, - uint32_t r2, uint32_t r3 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_f16_comp_spv, - kp::shader_data::op_mul_mat_f16_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne01, ne02; - uint32_t nb00, nb01, nb02, nb03; - int32_t ne10, ne11, ne12; - uint32_t nb10, nb11, nb12, nb13; - int32_t ne0, ne1; - uint32_t r2, r3; - } pushConsts { - safe_divide(inAOff, 2), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne01, ne02, - nb00, nb01, nb02, nb03, - ne10, ne11, ne12, - nb10, nb11, nb12, nb13, - ne0, ne1, - r2, r3 - }; - - const unsigned ny = unsigned((ne11 + 4 - 1)/4); - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - const uint32_t local_x = ggml_vk_current_device().subgroupSize * 2; - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned(ne01), ny, unsigned(ne12*ne13)}, {local_x}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned(ne01), ny, unsigned(ne12*ne13)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_mul_mat_mat_f32(kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, - uint32_t nb01, uint32_t nb02, - int32_t ne11, int32_t ne12, - uint32_t nb11, uint32_t nb12, - uint32_t nb1, uint32_t nb2) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_mat_f32_comp_spv, - kp::shader_data::op_mul_mat_mat_f32_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne01, ne02, ne11, ne12; - uint32_t nb01, nb02; - uint32_t nb11, nb12; - uint32_t nb1, nb2; - } pushConsts { - safe_divide(inAOff, 4), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne01, ne02, ne11, ne12, - nb01, nb02, nb11, nb12, - nb1, nb2 - }; - - const uint32_t local_x = ggml_vk_current_device().subgroupSize; - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), - {inA, inB, out}, spirv, - {unsigned(ne01), - unsigned(ne11), - unsigned(std::max(ne12, ne02)) - }, - {local_x}, - {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned(ne01), - unsigned(ne11), - unsigned(std::max(ne12, ne02)), - }); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_mul_mat_impl( - const std::vector& spirv, const char * suffix, uint32_t block_size, kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - int32_t ne0, int32_t ne1, - uint32_t nb01, uint32_t nb02, uint32_t nb03, - uint32_t nb11, uint32_t nb12, uint32_t nb13, - uint32_t r2, uint32_t r3 -) { - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne01, ne02; - int32_t ne10, ne12; - int32_t ne0, ne1; - uint32_t nb01, nb02, nb03; - uint32_t nb11, nb12, nb13; - uint32_t r2, r3; - } pushConsts { - safe_divide(inAOff, block_size), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne01, ne02, - ne10, ne12, - ne0, ne1, - nb01, nb02, nb03, - nb11, nb12, nb13, - r2, r3 - }; - - auto name = std::string(__func__) + "_" + suffix; - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - const uint32_t local_x = (ggml_vk_current_device().subgroupSize * 2) / 8; - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned((ne01 + 7)/8), unsigned(ne11), unsigned(ne12*ne13)}, {local_x}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned((ne01 + 7)/8), unsigned(ne11), unsigned(ne12*ne13)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -template -static void ggml_vk_mul_mat_q4_0(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_q4_0_comp_spv, - kp::shader_data::op_mul_mat_q4_0_comp_spv_len); - - ggml_vk_mul_mat_impl(spirv, "q4_0", 1/*We access blocks unaligned*/, std::forward(args)...); -} - -template -static void ggml_vk_mul_mat_q4_1(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_q4_1_comp_spv, - kp::shader_data::op_mul_mat_q4_1_comp_spv_len); - - ggml_vk_mul_mat_impl(spirv, "q4_1", 1/*We access blocks unaligned*/, std::forward(args)...); -} - -template -static void ggml_vk_mul_mat_q8_0(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_q8_0_comp_spv, - kp::shader_data::op_mul_mat_q8_0_comp_spv_len); - - ggml_vk_mul_mat_impl(spirv, "q8_0", 1/*We access blocks unaligned*/, std::forward(args)...); -} - -static void ggml_vk_mul_mat_q4_k( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - int32_t ne0, int32_t ne1, - uint32_t nb01, uint32_t nb02, uint32_t nb03, - uint32_t nb11, uint32_t nb12, uint32_t nb13, - uint32_t r2, uint32_t r3 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_q4_k_comp_spv, - kp::shader_data::op_mul_mat_q4_k_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne10, ne0, ne1, ne01, ne02, ne12; - uint32_t nb01, nb02, nb03, nb11, nb12, nb13; - uint32_t r2, r3; - } pushConsts { - inAOff, safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne10, ne0, ne1, ne01, ne02, ne12, - nb01, nb02, nb03, nb11, nb12, nb13, - r2, r3 - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned((ne01 + 3)/4), unsigned(ne11), unsigned(ne12) * unsigned(ne13)}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned((ne01 + 3)/4), unsigned(ne11), unsigned(ne12) * unsigned(ne13)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_mul_mat_q6_k( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, - int32_t ne10, int32_t ne11, int32_t ne12, int32_t ne13, - int32_t ne0, int32_t ne1, - uint32_t nb01, uint32_t nb02, uint32_t nb03, - uint32_t nb11, uint32_t nb12, uint32_t nb13, - uint32_t r2, uint32_t r3 -) { - const static auto spirv = getSpirvShader(kp::shader_data::op_mul_mat_q6_k_comp_spv, - kp::shader_data::op_mul_mat_q6_k_comp_spv_len); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, ne10, ne0, ne1, ne01, ne02, ne12; - uint32_t nb01, nb02, nb03, nb11, nb12, nb13; - uint32_t r2, r3; - } pushConsts { - inAOff, safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, ne10, ne0, ne1, ne01, ne02, ne12, - nb01, nb02, nb03, nb11, nb12, nb13, - r2, r3 - }; - - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(__func__)) { - const uint32_t local_x = 2; - const uint32_t local_y = ggml_vk_current_device().subgroupSize; - s_algo = komputeManager()->algorithm(__func__, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {unsigned((ne01 + 1)/2), unsigned(ne11), unsigned(ne12)*unsigned(ne13)}, {local_x, local_y}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(__func__); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({unsigned((ne01 + 1)/2), unsigned(ne11), unsigned(ne12)*unsigned(ne13)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_get_rows( - const std::vector& spirv, - const char * suffix, - unsigned element_size, unsigned qk, - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t outOff, - int32_t ne00, int32_t nb01, int32_t nb1, - uint32_t size -) { - GGML_ASSERT(nb01%element_size == 0); - GGML_ASSERT(nb1%sizeof(float) == 0); - if (qk) GGML_ASSERT(ne00%qk == 0); - - struct PushConstants { - uint32_t inAOff, inBOff, outOff; - int32_t ne00, nb01, nb1; - } pushConsts { - safe_divide(inAOff, element_size), safe_divide(inBOff, 4), safe_divide(outOff, 4), - ne00, nb01, nb1 - }; - - auto name = std::string(__func__) + "_" + suffix; - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {inA, inB, out}, spirv, {size}, {}, {pushConsts}); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({inA, inB, out}); - s_algo->setWorkgroup({size}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -template -static void ggml_vk_get_rows_f32(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_getrows_f32_comp_spv, - kp::shader_data::op_getrows_f32_comp_spv_len); - - ggml_vk_get_rows(spirv, "f32", sizeof(float), 0, std::forward(args)...); -} - -template -static void ggml_vk_get_rows_f16(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_getrows_f16_comp_spv, - kp::shader_data::op_getrows_f16_comp_spv_len); - - ggml_vk_get_rows(spirv, "f16", sizeof(half), 0, std::forward(args)...); -} - -template -static void ggml_vk_get_rows_q4_0(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_getrows_q4_0_comp_spv, - kp::shader_data::op_getrows_q4_0_comp_spv_len); - - ggml_vk_get_rows(spirv, "q4_0", 1/*We access blocks unaligned*/, QK4_0, std::forward(args)...); -} - -template -static void ggml_vk_get_rows_q4_1(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_getrows_q4_1_comp_spv, - kp::shader_data::op_getrows_q4_1_comp_spv_len); - - ggml_vk_get_rows(spirv, "q4_1", 1/*We access blocks unaligned*/, QK4_1, std::forward(args)...); -} - -template -static void ggml_vk_get_rows_q6_k(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_getrows_q6_k_comp_spv, - kp::shader_data::op_getrows_q6_k_comp_spv_len); - ggml_vk_get_rows(spirv, "q6_k", 1/*We access blocks unaligned*/, QK_NL, std::forward(args)...); -} - -static void ggml_vk_rope( - kp::Sequence& seq, - const std::shared_ptr& inA, - const std::shared_ptr& inB, - const std::shared_ptr& inC, - const std::shared_ptr& out, - uint32_t inAOff, uint32_t inBOff, uint32_t inCOff, uint32_t outOff, - ggml_type src0t, int32_t n_dims, int32_t mode, int32_t n_ctx_orig, - float freq_base, float freq_scale, bool has_freq_factors, float ext_factor, float attn_factor, float beta_fast, float beta_slow, - int32_t ne01, int32_t ne02, int32_t ne03, - uint32_t nb00, uint32_t nb01, uint32_t nb02, uint32_t nb03, - int32_t ne0, - uint32_t nb0, uint32_t nb1, uint32_t nb2, uint32_t nb3 -) { - GGML_ASSERT(src0t == GGML_TYPE_F16 || src0t == GGML_TYPE_F32); - - static const auto spirv_norm_f16 = getSpirvShader( - kp::shader_data::op_rope_norm_f16_comp_spv, kp::shader_data::op_rope_norm_f16_comp_spv_len - ); - static const auto spirv_norm_f32 = getSpirvShader( - kp::shader_data::op_rope_norm_f32_comp_spv, kp::shader_data::op_rope_norm_f32_comp_spv_len - ); - static const auto spirv_neox_f16 = getSpirvShader( - kp::shader_data::op_rope_neox_f16_comp_spv, kp::shader_data::op_rope_neox_f16_comp_spv_len - ); - static const auto spirv_neox_f32 = getSpirvShader( - kp::shader_data::op_rope_neox_f32_comp_spv, kp::shader_data::op_rope_neox_f32_comp_spv_len - ); - - int type_size = src0t == GGML_TYPE_F16 ? 2 : 4; - - GGML_ASSERT(nb03 % type_size == 0); - GGML_ASSERT(nb02 % type_size == 0); - GGML_ASSERT(nb01 % type_size == 0); - GGML_ASSERT(nb00 % type_size == 0); - GGML_ASSERT(nb3 % type_size == 0); - GGML_ASSERT(nb2 % type_size == 0); - GGML_ASSERT(nb1 % type_size == 0); - GGML_ASSERT(nb0 % type_size == 0); - - struct PushConstants { - uint32_t inAOff, inBOff, inCOff, outOff; - int32_t n_dims, mode, n_ctx_orig; - float freq_base, freq_scale; - bool has_freq_factors; - float ext_factor, attn_factor, beta_fast, beta_slow; - uint32_t nb00, nb01, nb02, nb03; - int32_t ne0; - uint32_t nb0, nb1, nb2, nb3; - } pushConsts { - safe_divide(inAOff, type_size), safe_divide(inBOff, 4), safe_divide(inCOff, type_size), safe_divide(outOff, type_size), - n_dims, mode, n_ctx_orig, - freq_base, freq_scale, - has_freq_factors, - ext_factor, attn_factor, beta_fast, beta_slow, - nb00, nb01, nb02, nb03, - ne0, - nb0, nb1, nb2, nb3 - }; - - auto & inC_ = inC ? inC : inA; - const bool is_neox = mode & GGML_ROPE_TYPE_NEOX; - const bool is_f16 = src0t == GGML_TYPE_F16; - - auto name = std::string(__func__) + (is_neox ? "_neox" : "_norm") + (src0t == GGML_TYPE_F16 ? "_f16" : "_f32"); - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) { - auto & spirv = is_neox ? is_f16 ? spirv_neox_f16 : spirv_neox_f32 : is_f16 ? spirv_norm_f16 : spirv_norm_f32; - s_algo = komputeManager()->algorithm( - name, s_kompute_context->pool.get(), {inA, inB, inC_, out}, spirv, - {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts} - ); - } else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({inA, inB, inC_, out}); - s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -static void ggml_vk_cpy( - const std::vector& spirv, - uint32_t in_element_size, uint32_t out_element_size, - kp::Sequence& seq, - const std::shared_ptr& in, - const std::shared_ptr& out, - uint32_t inOff, uint32_t outOff, - int32_t ne00, int32_t ne01, int32_t ne02, int32_t ne03, - uint32_t nb00, uint32_t nb01, uint32_t nb02, uint32_t nb03, - int32_t ne0, int32_t ne1, int32_t ne2, - uint32_t nb0, uint32_t nb1, uint32_t nb2, uint32_t nb3 -) { - struct PushConstants { - uint32_t inOff, outOff; - int32_t ne00, ne01, ne02; - uint32_t nb00, nb01, nb02, nb03; - int32_t ne0, ne1, ne2; - uint32_t nb0, nb1, nb2, nb3; - } pushConsts { - safe_divide(inOff, in_element_size), safe_divide(outOff, out_element_size), - ne00, ne01, ne02, - nb00, nb01, nb02, nb03, - ne0, ne1, ne2, - nb0, nb1, nb2, nb3 - }; - - std::string name = std::string(__func__) - + "_i_" + std::to_string(in_element_size) - + "_o_" + std::to_string(out_element_size); - std::shared_ptr s_algo = nullptr; - if (!komputeManager()->hasAlgorithm(name)) - s_algo = komputeManager()->algorithm(name, s_kompute_context->pool.get(), {in, out}, spirv, {unsigned(ne01), unsigned(ne02), unsigned(ne03)}, {}, {pushConsts}); - else { - s_algo = komputeManager()->getAlgorithm(name); - s_algo->setTensors({in, out}); - s_algo->setWorkgroup({unsigned(ne01), unsigned(ne02), unsigned(ne03)}); - s_algo->setPushConstants({pushConsts}); - s_algo->updateDescriptors(s_kompute_context->pool.get()); - } - seq.record(s_algo); -} - -template -static void ggml_vk_cpy_f32_f16(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_cpy_f32_f16_comp_spv, - kp::shader_data::op_cpy_f32_f16_comp_spv_len); - ggml_vk_cpy(spirv, 4, 2, std::forward(args)...); -} - -template -static void ggml_vk_cpy_f32_f32(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_cpy_f32_f32_comp_spv, - kp::shader_data::op_cpy_f32_f32_comp_spv_len); - ggml_vk_cpy(spirv, 4, 4, std::forward(args)...); -} - -template -static void ggml_vk_cpy_f16_f16(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_cpy_f16_f16_comp_spv, - kp::shader_data::op_cpy_f16_f16_comp_spv_len); - ggml_vk_cpy(spirv, 2, 2, std::forward(args)...); -} - -template -static void ggml_vk_cpy_f16_f32(Args&&... args) { - const static auto spirv = getSpirvShader(kp::shader_data::op_cpy_f16_f32_comp_spv, - kp::shader_data::op_cpy_f16_f32_comp_spv_len); - ggml_vk_cpy(spirv, 2, 4, std::forward(args)...); -} - -static bool ggml_backend_kompute_device_supports_op(ggml_backend_dev_t dev, const struct ggml_tensor * op) { - int64_t n = ggml_nelements(op); - switch (op->op) { - case GGML_OP_UNARY: - if (n % 4 != 0) return false; - switch (ggml_get_unary_op(op)) { - case GGML_UNARY_OP_GELU: - if (n % 8 != 0) return false; - // fall through - case GGML_UNARY_OP_RELU: - case GGML_UNARY_OP_SILU: - return ggml_is_contiguous(op->src[0]); - default: - ; - } - break; - case GGML_OP_NONE: - case GGML_OP_RESHAPE: - case GGML_OP_VIEW: - case GGML_OP_TRANSPOSE: - case GGML_OP_PERMUTE: - case GGML_OP_ADD: - case GGML_OP_MUL: - case GGML_OP_SCALE: - case GGML_OP_SOFT_MAX: - case GGML_OP_RMS_NORM: - case GGML_OP_NORM: - return true; - case GGML_OP_ROPE: - { - const int mode = ((const int32_t *) op->op_params)[2]; - if (mode & GGML_ROPE_TYPE_MROPE) { - return false; - } - if (mode & GGML_ROPE_TYPE_VISION) { - return false; - } - return true; - } - case GGML_OP_DUP: - case GGML_OP_CPY: - case GGML_OP_CONT: - switch (op->src[0]->type) { - case GGML_TYPE_F32: - case GGML_TYPE_F16: - break; - default: - return false; - } - switch (op->type) { - case GGML_TYPE_F32: - case GGML_TYPE_F16: - break; - default: - return false; - } - return true; - case GGML_OP_DIAG_MASK_INF: - return op->ne[3] == 1; - case GGML_OP_GET_ROWS: - switch (op->src[0]->type) { - case GGML_TYPE_F32: - case GGML_TYPE_F16: - case GGML_TYPE_Q4_0: - case GGML_TYPE_Q4_1: - case GGML_TYPE_Q6_K: - return op->ne[2] == 1 && op->ne[3] == 1; - default: - ; - } - return false; - case GGML_OP_MUL_MAT: - if (op->src[1]->type != GGML_TYPE_F32 || ggml_is_transposed(op->src[0]) || ggml_is_transposed(op->src[1])) - return false; - - switch (op->src[0]->type) { - case GGML_TYPE_F32: - return op->ne[3] == 1; - case GGML_TYPE_Q6_K: - case GGML_TYPE_F16: - case GGML_TYPE_Q8_0: - case GGML_TYPE_Q4_0: - case GGML_TYPE_Q4_1: - case GGML_TYPE_Q4_K: - return true; - default: - ; - } - default: - ; - } - return false; - - GGML_UNUSED(dev); -} - -static void ggml_vk_graph_compute(struct ggml_kompute_context * ctx, struct ggml_cgraph * gf) { - const int n_seq = 8; - - // FIXME: Figure out if we can somehow optimize the size of the pool... right now we're setting - // it to the size of the graph, but I think it can be made smaller? - ggml_vk_allocate_descriptor_pool(ctx, gf->n_nodes); - - std::vector> sequences(n_seq); - - for (auto& sequence : sequences) { - sequence = komputeManager()->sequence(); - } - for (int seq_idx = 0; seq_idx < n_seq; ++seq_idx) { - const int n_nodes_per_seq = (gf->n_nodes + n_seq - 1) / n_seq; - - auto& seq = *sequences[seq_idx]; - - const int node_start = (seq_idx + 0) * n_nodes_per_seq; - const int node_end = std::min((seq_idx == n_seq - 1) ? gf->n_nodes : (seq_idx + 1) * n_nodes_per_seq, gf->n_nodes); - - bool any_commands_recorded = false; - - for (int i = node_start; i < node_end; ++i) { - struct ggml_tensor * src0 = gf->nodes[i]->src[0]; - struct ggml_tensor * src1 = gf->nodes[i]->src[1]; - struct ggml_tensor * src2 = gf->nodes[i]->src[2]; GGML_UNUSED(src2); - struct ggml_tensor * dst = gf->nodes[i]; - GGML_ASSERT(dst->data != nullptr); - - if (ggml_is_empty(dst)) { - continue; - } - - switch (dst->op) { - case GGML_OP_NONE: - case GGML_OP_RESHAPE: - case GGML_OP_VIEW: - case GGML_OP_TRANSPOSE: - case GGML_OP_PERMUTE: - continue; // noop -> next node - default: - break; - } - - any_commands_recorded = true; - - const int32_t ne00 = src0 ? src0->ne[0] : 0; - const int32_t ne01 = src0 ? src0->ne[1] : 0; - const int32_t ne02 = src0 ? src0->ne[2] : 0; - const int32_t ne03 = src0 ? src0->ne[3] : 0; - - const uint32_t nb00 = src0 ? src0->nb[0] : 0; - const uint32_t nb01 = src0 ? src0->nb[1] : 0; - const uint32_t nb02 = src0 ? src0->nb[2] : 0; - const uint32_t nb03 = src0 ? src0->nb[3] : 0; - - const int32_t ne10 = src1 ? src1->ne[0] : 0; - const int32_t ne11 = src1 ? src1->ne[1] : 0; - const int32_t ne12 = src1 ? src1->ne[2] : 0; - const int32_t ne13 = src1 ? src1->ne[3] : 0; - - const uint32_t nb10 = src1 ? src1->nb[0] : 0; - const uint32_t nb11 = src1 ? src1->nb[1] : 0; - const uint32_t nb12 = src1 ? src1->nb[2] : 0; - const uint32_t nb13 = src1 ? src1->nb[3] : 0; - - const int32_t ne0 = dst ? dst->ne[0] : 0; - const int32_t ne1 = dst ? dst->ne[1] : 0; - const int32_t ne2 = dst ? dst->ne[2] : 0; -// const int32_t ne3 = dst ? dst->ne[3] : 0; - - const uint32_t nb0 = dst ? dst->nb[0] : 0; - const uint32_t nb1 = dst ? dst->nb[1] : 0; - const uint32_t nb2 = dst ? dst->nb[2] : 0; - const uint32_t nb3 = dst ? dst->nb[3] : 0; - - const enum ggml_type src0t = src0 ? src0->type : GGML_TYPE_COUNT; - const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; - const enum ggml_type dstt = dst ? dst->type : GGML_TYPE_COUNT; - - const static std::shared_ptr nullTensor = nullptr; - uint32_t off_src0 = 0; - uint32_t off_src1 = 0; - uint32_t off_src2 = 0; - uint32_t off_dst = 0; - const std::shared_ptr& id_src0 = src0 ? ggml_vk_get_tensor(src0, &off_src0) : nullTensor; - const std::shared_ptr& id_src1 = src1 ? ggml_vk_get_tensor(src1, &off_src1) : nullTensor; - const std::shared_ptr& id_src2 = src2 ? ggml_vk_get_tensor(src2, &off_src2) : nullTensor; - const std::shared_ptr& id_dst = dst ? ggml_vk_get_tensor(dst, &off_dst) : nullTensor; - - switch (dst->op) { - case GGML_OP_ADD: - { - if (ggml_nelements(src1) == ne10 && ggml_is_contiguous(src1) && ne00 % 4 == 0 && ne10 % 4 == 0) { - // src1 is a row - ggml_vk_addrow(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ggml_nelements(dst)/4, ne00); - } else { - ggml_vk_add( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne03, - nb00, nb01, nb02, nb03, - ne10, ne11, ne12, ne13, - nb10, nb11, nb12, nb13, - ne0, - nb0, nb1, nb2, nb3 - ); - } - } break; - case GGML_OP_MUL: - { - ggml_vk_mul( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne03, - nb00, nb01, nb02, nb03, - ne10, ne11, ne12, ne13, - nb10, nb11, nb12, nb13, - ne0, - nb0, nb1, nb2, nb3 - ); - } break; - case GGML_OP_SCALE: - { - float scale; memcpy(&scale, dst->op_params, sizeof(float)); - - ggml_vk_scale(seq, id_src0, id_dst, off_src0, off_dst, ggml_nelements(dst), scale); - } break; - case GGML_OP_UNARY: - { - int64_t n = ggml_nelements(dst); - GGML_ASSERT(n % 4 == 0); - switch (ggml_get_unary_op(gf->nodes[i])) { - case GGML_UNARY_OP_SILU: - { - ggml_vk_silu(seq, id_src0, id_dst, off_src0, off_dst, n/4); - } break; - case GGML_UNARY_OP_RELU: - { - ggml_vk_relu(seq, id_src0, id_dst, off_src0, off_dst, n/4); - } break; - case GGML_UNARY_OP_GELU: - { - GGML_ASSERT(n % 8 == 0); - ggml_vk_gelu(seq, id_src0, id_dst, off_src0, off_dst, n/8); - } break; - default: - { - fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - GGML_ABORT("fatal error"); - } - } - } break; - case GGML_OP_SOFT_MAX: - { - float scale; - float max_bias; - - memcpy(&scale, (float *)dst->op_params + 0, sizeof(float)); - memcpy(&max_bias, (float *)dst->op_params + 1, sizeof(float)); - -#pragma message("TODO: add ggml_vk_soft_max() F16 src1 support") -#pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5021") - GGML_ASSERT(!src1 || src1t == GGML_TYPE_F32); - - const int64_t nrows_x = ggml_nrows(src0); - const int64_t nrows_y = src0->ne[1]; - - const uint32_t n_head = nrows_x/nrows_y; - const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); - - const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); - const float m1 = powf(2.0f, -(max_bias / 2.0f) / n_head_log2); - - ggml_vk_soft_max(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, ne01, ne02, ne03, scale, max_bias, m0, m1, n_head_log2); - } break; - case GGML_OP_DIAG_MASK_INF: - { - const int n_past = ((int32_t *)(dst->op_params))[0]; - ggml_vk_diag_mask_inf(seq, id_src0, id_dst, off_src0, off_dst, n_past, ne00, ne01, ne02); - } break; - case GGML_OP_NORM: - { - float eps; - memcpy(&eps, dst->op_params, sizeof(float)); - ggml_vk_norm(seq, id_src0, id_dst, off_src0, off_dst, ne00, nb01, ggml_nrows(src0), eps); - } break; - case GGML_OP_RMS_NORM: - { - GGML_ASSERT(ne00 % 4 == 0); - - float eps; - memcpy(&eps, dst->op_params, sizeof(float)); - ggml_vk_rms_norm(seq, id_src0, id_dst, off_src0, off_dst, ne00, nb01, ggml_nrows(src0), eps); - } break; - case GGML_OP_MUL_MAT: - { - GGML_ASSERT(ne00 == ne10); - - GGML_ASSERT(ne12 % ne02 == 0); - GGML_ASSERT(ne13 % ne03 == 0); - - const uint32_t r2 = ne12/ne02; - const uint32_t r3 = ne13/ne03; - - if (src1t != GGML_TYPE_F32) { - fprintf(stderr, "%s: %s: Unsupported src1 type: %u/%u\n", __func__, ggml_op_name(dst->op), src0t, src1t); - goto not_implemented; - } - - if (ggml_is_transposed(src0) || - ggml_is_transposed(src1)) { - fprintf(stderr, "%s: %s: matmul on tranposed tensor not supported: %u/%u\n", __func__, ggml_op_name(dst->op), src0t, src1t); - goto not_implemented; - } - - switch (src0t) { - case GGML_TYPE_F32: - ggml_vk_mul_mat_mat_f32( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, nb01, nb02, ne11, ne12, nb11, nb12, nb1, nb2 - ); - break; - case GGML_TYPE_F16: - ggml_vk_mul_mat_f16( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, ne13, nb10, nb11, nb12, nb13, - ne0, ne1, r2, r3 - ); - break; - case GGML_TYPE_Q8_0: - ggml_vk_mul_mat_q8_0( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne10, ne11, ne12, ne13, ne0, ne1, - nb01, nb02, nb03, nb11, nb12, nb13, r2, r3 - ); - break; - case GGML_TYPE_Q4_0: - ggml_vk_mul_mat_q4_0( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne10, ne11, ne12, ne13, ne0, ne1, - nb01, nb02, nb03, nb11, nb12, nb13, r2, r3 - ); - break; - case GGML_TYPE_Q4_1: - ggml_vk_mul_mat_q4_1( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne10, ne11, ne12, ne13, ne0, ne1, - nb01, nb02, nb03, nb11, nb12, nb13, r2, r3 - ); - break; - case GGML_TYPE_Q4_K: - ggml_vk_mul_mat_q4_k( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne10, ne11, ne12, ne13, ne0, ne1, - nb01, nb02, nb03, nb11, nb12, nb13, r2, r3 - ); - break; - case GGML_TYPE_Q6_K: - ggml_vk_mul_mat_q6_k( - seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, - ne00, ne01, ne02, ne10, ne11, ne12, ne13, ne0, ne1, - nb01, nb02, nb03, nb11, nb12, nb13, r2, r3 - ); - break; - default: { - fprintf(stderr, "%s: %s: Unsupported quantization: %u/%u\n", __func__, ggml_op_name(dst->op), src0t, src1t); - goto not_implemented; - } - } - - } break; - case GGML_OP_GET_ROWS: - { - if (src0t == GGML_TYPE_F32) { - ggml_vk_get_rows_f32(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, nb01, nb1, ggml_nelements(src1)); - } else if (src0t == GGML_TYPE_F16) { - ggml_vk_get_rows_f16(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, nb01, nb1, ggml_nelements(src1)); - } else if (src0t == GGML_TYPE_Q4_0) { - ggml_vk_get_rows_q4_0(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, nb01, nb1, ggml_nelements(src1)); - } else if (src0t == GGML_TYPE_Q4_1) { - ggml_vk_get_rows_q4_1(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, nb01, nb1, ggml_nelements(src1)); - } else if (src0t == GGML_TYPE_Q6_K) { - ggml_vk_get_rows_q6_k(seq, id_src0, id_src1, id_dst, off_src0, off_src1, off_dst, ne00, nb01, nb1, ggml_nelements(src1)); - } else { - fprintf(stderr, "%s: %s: Unsupported quantization: %u\n", __func__, ggml_op_name(dst->op), src0t); - goto not_implemented; - } - } break; - case GGML_OP_ROPE: - { - GGML_ASSERT(ne10 == ne02); - GGML_ASSERT(src0t == dstt); - // const int n_past = ((int32_t *) dst->op_params)[0]; - const int n_dims = ((int32_t *) dst->op_params)[1]; - const int mode = ((int32_t *) dst->op_params)[2]; - // skip 3, n_ctx used in GLM RoPE, unimplemented in Vulkan - const int n_ctx_orig = ((int32_t *) dst->op_params)[4]; - - const bool has_freq_factors = dst->src[2] != nullptr; - - float freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow; - memcpy(&freq_base, (int32_t *) dst->op_params + 5, sizeof(float)); - memcpy(&freq_scale, (int32_t *) dst->op_params + 6, sizeof(float)); - memcpy(&ext_factor, (int32_t *) dst->op_params + 7, sizeof(float)); - memcpy(&attn_factor, (int32_t *) dst->op_params + 8, sizeof(float)); - memcpy(&beta_fast, (int32_t *) dst->op_params + 9, sizeof(float)); - memcpy(&beta_slow, (int32_t *) dst->op_params + 10, sizeof(float)); - ggml_vk_rope( - seq, id_src0, id_src1, id_src2, id_dst, off_src0, off_src1, off_src2, off_dst, src0t, n_dims, mode, n_ctx_orig, - freq_base, freq_scale, has_freq_factors, ext_factor, attn_factor, beta_fast, beta_slow, - ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, nb0, nb1, nb2, nb3 - ); - } break; - case GGML_OP_DUP: - case GGML_OP_CPY: - case GGML_OP_CONT: - { - switch (src0t) { - case GGML_TYPE_F32: - { - switch (dstt) { - case GGML_TYPE_F16: ggml_vk_cpy_f32_f16(seq, id_src0, id_dst, off_src0, off_dst, ne00, ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, ne1, ne2, nb0, nb1, nb2, nb3); break; - case GGML_TYPE_F32: ggml_vk_cpy_f32_f32(seq, id_src0, id_dst, off_src0, off_dst, ne00, ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, ne1, ne2, nb0, nb1, nb2, nb3); break; - default: goto not_implemented; - } - } break; - case GGML_TYPE_F16: - { - switch (dstt) { - case GGML_TYPE_F16: ggml_vk_cpy_f16_f16(seq, id_src0, id_dst, off_src0, off_dst, ne00, ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, ne1, ne2, nb0, nb1, nb2, nb3); break; - case GGML_TYPE_F32: ggml_vk_cpy_f16_f32(seq, id_src0, id_dst, off_src0, off_dst, ne00, ne01, ne02, ne03, nb00, nb01, nb02, nb03, ne0, ne1, ne2, nb0, nb1, nb2, nb3); break; - default: goto not_implemented; - } break; - default: goto not_implemented; - } - } - } break; - default: goto not_implemented; - } - continue; - not_implemented: {} - fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); - //GGML_ABORT("fatal error"); - } - - // Evaluate sequence - if (any_commands_recorded) { - seq.evalAsync(); - } - } - - // Wait for all sequences to finish - for (auto& sequence : sequences) { - if (sequence->isRunning()) - sequence->evalAwait(); - } - - ggml_vk_free_descriptor_pool(ctx); -} - -template<> -kp::Tensor::TensorDataTypes -kp::TensorT::dataType() -{ - return TensorDataTypes::eFloat; -} - -template<> -kp::Tensor::TensorDataTypes -kp::TensorT::dataType() -{ - return TensorDataTypes::eUnsignedInt; -} - -//////////////////////////////////////////////////////////////////////////////// - -// backend interface - -struct ggml_backend_kompute_buffer_type_context { - int device; - int device_ref = 0; - uint64_t buffer_alignment; - uint64_t max_alloc; - std::string name; - - ggml_backend_kompute_buffer_type_context(int device, uint64_t buffer_alignment, uint64_t max_alloc) - : device(device), buffer_alignment(buffer_alignment), max_alloc(max_alloc), name(ggml_kompute_format_name(device)) {} -}; - -static void ggml_backend_kompute_device_ref(ggml_backend_buffer_type_t buft) { - auto * ctx = static_cast(buft->context); - - if (!ctx->device_ref) { - komputeManager()->initializeDevice( - ctx->device, {}, { - "VK_KHR_shader_float16_int8", "VK_KHR_8bit_storage", - "VK_KHR_16bit_storage", "VK_KHR_shader_non_semantic_info" - } - ); - } - - assert(ggml_vk_has_device()); - ctx->device_ref++; -} - -static void ggml_backend_kompute_device_unref(ggml_backend_buffer_type_t buft) { - auto * ctx = static_cast(buft->context); - - assert(ctx->device_ref > 0); - - ctx->device_ref--; - - if (!ctx->device_ref) { - komputeManager.destroy(); - } -} - -static void ggml_backend_kompute_buffer_free_buffer(ggml_backend_buffer_t buffer) { - auto * memory = (ggml_vk_memory *)buffer->context; - if (ggml_vk_has_device()) { - ggml_vk_free_memory(*memory); - } - delete memory; -} - -static void * ggml_backend_kompute_buffer_get_base(ggml_backend_buffer_t buffer) { - return ((ggml_vk_memory *)buffer->context)->data; -} - -static void ggml_backend_kompute_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { - GGML_UNUSED(buffer); - - const auto res = ggml_vk_get_tensor(tensor); - GGML_ASSERT(res); - - memcpy((char *)tensor->data + offset, data, size); - - komputeManager()->sequence()->eval({res}); -} - -static void ggml_backend_kompute_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { - GGML_UNUSED(buffer); - - const auto res = ggml_vk_get_tensor(tensor); - GGML_ASSERT(res); - - komputeManager()->sequence()->eval({res}); - - memcpy(data, (const char *)tensor->data + offset, size); -} - -static void ggml_backend_kompute_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { - auto * memory = (ggml_vk_memory *)buffer->context; - memset(memory->data, value, buffer->size); - - if (memory->stagingBuffer) - komputeManager()->sequence()->eval(memory->primaryBuffer, memory->stagingBuffer, memory->size); -} - -static ggml_backend_buffer_i ggml_backend_kompute_buffer_i = { - /* .free_buffer = */ ggml_backend_kompute_buffer_free_buffer, - /* .get_base = */ ggml_backend_kompute_buffer_get_base, - /* .init_tensor = */ NULL, - /* .memset_tensor = */ NULL, - /* .set_tensor = */ ggml_backend_kompute_buffer_set_tensor, - /* .get_tensor = */ ggml_backend_kompute_buffer_get_tensor, - /* .cpy_tensor = */ NULL, - /* .clear = */ ggml_backend_kompute_buffer_clear, - /* .reset = */ NULL, -}; - -// default buffer type - -static const char * ggml_backend_kompute_buffer_type_get_name(ggml_backend_buffer_type_t buft) { - auto * ctx = static_cast(buft->context); - return ctx->name.c_str(); -} - -static ggml_backend_buffer_t ggml_backend_kompute_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { - ggml_backend_kompute_device_ref(buft); - auto * ctx = new ggml_vk_memory(ggml_vk_allocate(size)); - return ggml_backend_buffer_init(buft, ggml_backend_kompute_buffer_i, ctx, size); -} - -static size_t ggml_backend_kompute_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { - auto * ctx = static_cast(buft->context); - return ctx->buffer_alignment; -} - -static size_t ggml_backend_vk_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - auto * ctx = static_cast(buft->context); - return ctx->max_alloc; -} - -static ggml_backend_buffer_type_i ggml_backend_kompute_buffer_type_interface = { - /* .get_name = */ ggml_backend_kompute_buffer_type_get_name, - /* .alloc_buffer = */ ggml_backend_kompute_buffer_type_alloc_buffer, - /* .get_alignment = */ ggml_backend_kompute_buffer_type_get_alignment, - /* .get_max_size = */ ggml_backend_vk_buffer_type_get_max_size, - /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes - /* .is_host = */ NULL, -}; - -ggml_backend_buffer_type_t ggml_backend_kompute_buffer_type(int device) { - static std::mutex mutex; - std::lock_guard lock(mutex); - - auto devices = ggml_vk_available_devices(); - int32_t device_count = (int32_t) devices.size(); - GGML_ASSERT(device < device_count); - GGML_ASSERT(devices.size() <= GGML_KOMPUTE_MAX_DEVICES); - - static ggml_backend_buffer_type - ggml_backend_kompute_buffer_types[GGML_KOMPUTE_MAX_DEVICES]; - - static bool ggml_backend_kompute_buffer_type_initialized = false; - - if (!ggml_backend_kompute_buffer_type_initialized) { - for (int32_t i = 0; i < device_count; i++) { - ggml_backend_kompute_buffer_types[i] = { - /* .iface = */ ggml_backend_kompute_buffer_type_interface, - /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), i), - /* .context = */ new ggml_backend_kompute_buffer_type_context{ i, devices[i].bufferAlignment, devices[i].maxAlloc }, - }; - } - ggml_backend_kompute_buffer_type_initialized = true; - } - - return &ggml_backend_kompute_buffer_types[device]; -} - -// backend - -static const char * ggml_backend_kompute_name(ggml_backend_t backend) { - auto * ctx = static_cast(backend->context); - return ctx->name.c_str(); -} - -static void ggml_backend_kompute_free(ggml_backend_t backend) { - auto * ctx = static_cast(backend->context); - - assert(ctx == s_kompute_context); - s_kompute_context = nullptr; - if (ctx != nullptr) { - delete ctx; - } - - delete backend; -} - -static ggml_status ggml_backend_kompute_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { - auto * ctx = static_cast(backend->context); - ggml_vk_graph_compute(ctx, cgraph); - return GGML_STATUS_SUCCESS; -} - -static struct ggml_backend_i kompute_backend_i = { - /* .get_name = */ ggml_backend_kompute_name, - /* .free = */ ggml_backend_kompute_free, - /* .set_tensor_async = */ NULL, - /* .get_tensor_async = */ NULL, - /* .cpy_tensor_async = */ NULL, - /* .synchronize = */ NULL, - /* .graph_plan_create = */ NULL, - /* .graph_plan_free = */ NULL, - /* .graph_plan_update = */ NULL, - /* .graph_plan_compute = */ NULL, - /* .graph_compute = */ ggml_backend_kompute_graph_compute, - /* .event_record = */ NULL, - /* .event_wait = */ NULL, -}; - -static ggml_guid_t ggml_backend_kompute_guid() { - static ggml_guid guid = { 0x7b, 0x57, 0xdc, 0xaf, 0xde, 0x12, 0x1d, 0x49, 0xfb, 0x35, 0xfa, 0x9b, 0x18, 0x31, 0x1d, 0xca }; - return &guid; -} - -ggml_backend_t ggml_backend_kompute_init(int device) { - GGML_ASSERT(s_kompute_context == nullptr); - s_kompute_context = new ggml_kompute_context(device); - - ggml_backend_t kompute_backend = new ggml_backend { - /* .guid = */ ggml_backend_kompute_guid(), - /* .interface = */ kompute_backend_i, - /* .device = */ ggml_backend_reg_dev_get(ggml_backend_kompute_reg(), device), - /* .context = */ s_kompute_context, - }; - - return kompute_backend; -} - -bool ggml_backend_is_kompute(ggml_backend_t backend) { - return backend != NULL && ggml_guid_matches(backend->guid, ggml_backend_kompute_guid()); -} - -static size_t ggml_backend_kompute_get_device_count() { - auto devices = ggml_vk_available_devices(); - return devices.size(); -} - -static void ggml_backend_kompute_get_device_description(int device, char * description, size_t description_size) { - auto devices = ggml_vk_available_devices(); - GGML_ASSERT((size_t) device < devices.size()); - snprintf(description, description_size, "%s", devices[device].name); -} - -static void ggml_backend_kompute_get_device_memory(int device, size_t * free, size_t * total) { - auto devices = ggml_vk_available_devices(); - GGML_ASSERT((size_t) device < devices.size()); - *total = devices[device].heapSize; - *free = devices[device].heapSize; -} - -////////////////////////// - -struct ggml_backend_kompute_device_context { - int device; - std::string name; - std::string description; -}; - -static const char * ggml_backend_kompute_device_get_name(ggml_backend_dev_t dev) { - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - return ctx->name.c_str(); -} - -static const char * ggml_backend_kompute_device_get_description(ggml_backend_dev_t dev) { - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - return ctx->description.c_str(); -} - -static void ggml_backend_kompute_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - ggml_backend_kompute_get_device_memory(ctx->device, free, total); -} - -static ggml_backend_buffer_type_t ggml_backend_kompute_device_get_buffer_type(ggml_backend_dev_t dev) { - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - return ggml_backend_kompute_buffer_type(ctx->device); -} - -static bool ggml_backend_kompute_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - if (buft->iface.get_name != ggml_backend_kompute_buffer_type_get_name) { - return false; - } - - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - ggml_backend_kompute_buffer_type_context * buft_ctx = (ggml_backend_kompute_buffer_type_context *)buft->context; - - return buft_ctx->device == ctx->device; -} - -static enum ggml_backend_dev_type ggml_backend_kompute_device_get_type(ggml_backend_dev_t dev) { - GGML_UNUSED(dev); - return GGML_BACKEND_DEVICE_TYPE_GPU; -} - -static void ggml_backend_kompute_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { - props->name = ggml_backend_kompute_device_get_name(dev); - props->description = ggml_backend_kompute_device_get_description(dev); - props->type = ggml_backend_kompute_device_get_type(dev); - ggml_backend_kompute_device_get_memory(dev, &props->memory_free, &props->memory_total); - props->caps = { - /* async = */ false, - /* host_buffer = */ false, - /* .buffer_from_host_ptr = */ false, - /* events = */ false, - }; -} - -static ggml_backend_t ggml_backend_kompute_device_init(ggml_backend_dev_t dev, const char * params) { - GGML_UNUSED(params); - ggml_backend_kompute_device_context * ctx = (ggml_backend_kompute_device_context *)dev->context; - return ggml_backend_kompute_init(ctx->device); -} - -static bool ggml_backend_kompute_device_offload_op(ggml_backend_dev_t dev, const ggml_tensor * op) { - const int min_batch_size = 32; - - return (op->ne[1] >= min_batch_size && op->op != GGML_OP_GET_ROWS) || - (op->ne[2] >= min_batch_size && op->op == GGML_OP_MUL_MAT_ID); - - GGML_UNUSED(dev); -} - -static const struct ggml_backend_device_i ggml_backend_kompute_device_i = { - /* .get_name = */ ggml_backend_kompute_device_get_name, - /* .get_description = */ ggml_backend_kompute_device_get_description, - /* .get_memory = */ ggml_backend_kompute_device_get_memory, - /* .get_type = */ ggml_backend_kompute_device_get_type, - /* .get_props = */ ggml_backend_kompute_device_get_props, - /* .init_backend = */ ggml_backend_kompute_device_init, - /* .get_buffer_type = */ ggml_backend_kompute_device_get_buffer_type, - /* .get_host_buffer_type = */ NULL, - /* .buffer_from_host_ptr = */ NULL, - /* .supports_op = */ ggml_backend_kompute_device_supports_op, - /* .supports_buft = */ ggml_backend_kompute_device_supports_buft, - /* .offload_op = */ ggml_backend_kompute_device_offload_op, - /* .event_new = */ NULL, - /* .event_free = */ NULL, - /* .event_synchronize = */ NULL, -}; - -static const char * ggml_backend_kompute_reg_get_name(ggml_backend_reg_t reg) { - GGML_UNUSED(reg); - return "Kompute"; -} - -static size_t ggml_backend_kompute_reg_get_device_count(ggml_backend_reg_t reg) { - GGML_UNUSED(reg); - return ggml_backend_kompute_get_device_count(); -} - -static ggml_backend_dev_t ggml_backend_kompute_reg_get_device(ggml_backend_reg_t reg, size_t device) { - static std::vector devices; - - static bool initialized = false; - - { - static std::mutex mutex; - std::lock_guard lock(mutex); - if (!initialized) { - for (size_t i = 0; i < ggml_backend_kompute_get_device_count(); i++) { - ggml_backend_kompute_device_context * ctx = new ggml_backend_kompute_device_context; - char desc[256]; - ggml_backend_kompute_get_device_description(i, desc, sizeof(desc)); - ctx->device = i; - ctx->name = "Kompute" + std::to_string(i); - ctx->description = desc; - devices.push_back(new ggml_backend_device { - /* .iface = */ ggml_backend_kompute_device_i, - /* .reg = */ reg, - /* .context = */ ctx, - }); - } - initialized = true; - } - } - - GGML_ASSERT(device < devices.size()); - return devices[device]; -} - -static const struct ggml_backend_reg_i ggml_backend_kompute_reg_i = { - /* .get_name = */ ggml_backend_kompute_reg_get_name, - /* .get_device_count = */ ggml_backend_kompute_reg_get_device_count, - /* .get_device = */ ggml_backend_kompute_reg_get_device, - /* .get_proc_address = */ NULL, -}; - -ggml_backend_reg_t ggml_backend_kompute_reg() { - static ggml_backend_reg reg = { - /* .api_version = */ GGML_BACKEND_API_VERSION, - /* .iface = */ ggml_backend_kompute_reg_i, - /* .context = */ nullptr, - }; - - return ® -} - -GGML_BACKEND_DL_IMPL(ggml_backend_kompute_reg) diff --git a/ggml/src/ggml-kompute/kompute b/ggml/src/ggml-kompute/kompute deleted file mode 160000 index 4565194ed7c32..0000000000000 --- a/ggml/src/ggml-kompute/kompute +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4565194ed7c32d1d2efa32ceab4d3c6cae006306 diff --git a/ggml/src/ggml-kompute/kompute-shaders/common.comp b/ggml/src/ggml-kompute/kompute-shaders/common.comp deleted file mode 100644 index dbe4cf804e6c0..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/common.comp +++ /dev/null @@ -1,112 +0,0 @@ -#extension GL_EXT_shader_16bit_storage: require -#extension GL_EXT_shader_8bit_storage: require -#extension GL_EXT_shader_explicit_arithmetic_types_float16: require -#extension GL_EXT_shader_explicit_arithmetic_types_int8: require -#extension GL_EXT_shader_explicit_arithmetic_types_int16: require -#extension GL_EXT_shader_explicit_arithmetic_types_int64: require -#extension GL_EXT_control_flow_attributes: enable -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_EXT_debug_printf : enable - -#define QK4_0 32 -#define QK4_1 32 - -#define GELU_COEF_A 0.044715 -#define SQRT_2_OVER_PI 0.79788456080286535587989211986876 -#define TWOPI_F 6.283185307179586f - -#define QK_K 256 -#define K_SCALE_SIZE 12 - -#define u8BufToU16(buf, idx) (((uint16_t(buf[idx + 1]) << 8)) | buf[idx]) -#define u8BufToFloat16(buf, idx) uint16BitsToHalf u8BufToU16(buf, idx) -#define u8BufToU32(buf, idx) (((uint32_t u8BufToU16(buf, idx + 2) << 8 | buf[idx + 1]) << 8) | buf[idx]) -#define u8BufToFloat(buf, idx) uintBitsToFloat u8BufToU32(buf, idx) - -#define sizeof_block_q4_0 0x12 -struct block_q4_0 { - float16_t d; - uint8_t qs[QK4_0 / 2]; -}; -mat4 dequantize_q4_0(const block_q4_0 xb, uint il) { - const float d1 = il != 0 ? (xb.d / 16.f) : xb.d; - const float d2 = d1 / 256.f; - const float md = -8.f * xb.d; - const uint16_t mask0 = il != 0 ? uint16_t(0x00F0) : uint16_t(0x000F); - const uint16_t mask1 = mask0 << 8; - - mat4 reg; - for (int i=0;i<8;i++) { - uint16_t b = (uint16_t(xb.qs[2 * i + 1]) << 8) | uint16_t(xb.qs[2 * i]); - reg[i/2][2*(i%2)+0] = d1 * (b & mask0) + md; - reg[i/2][2*(i%2)+1] = d2 * (b & mask1) + md; - } - return reg; -} - -#define sizeof_block_q4_1 0x14 -struct block_q4_1 { - float16_t d; - float16_t m; - uint8_t qs[QK4_1 / 2]; -}; -mat4 dequantize_q4_1(const block_q4_1 xb, uint il) { - const float d1 = il != 0 ? (xb.d / 16.f) : xb.d; - const float d2 = d1 / 256.f; - const float m = xb.m; - const uint16_t mask0 = il != 0 ? uint16_t(0x00F0) : uint16_t(0x000F); - const uint16_t mask1 = mask0 << 8; - - mat4 reg; - for (int i=0;i<8;i++) { - uint16_t b = (uint16_t(xb.qs[2 * i + 1]) << 8) | uint16_t(xb.qs[2 * i]); - reg[i/2][2*(i%2)+0] = ((b & mask0) * d1) + m; - reg[i/2][2*(i%2)+1] = ((b & mask1) * d2) + m; - } - return reg; -} - -#define sizeof_block_q4_k 144 -struct block_q4_k { - float16_t d; - float16_t dmin; - uint8_t scales[K_SCALE_SIZE]; - uint8_t qs[QK_K/2]; -}; - -#define sizeof_block_q6_k 210 -struct block_q6_k { - uint8_t ql[QK_K/2]; // quants, lower 4 bits - uint8_t qh[QK_K/4]; // quants, upper 2 bits - int8_t scales[QK_K/16]; // scales, quantized with 8 bits - float16_t d; // super-block scale -}; -mat4 dequantize_q6_k(const block_q6_k xb, uint il) { - const float16_t d_all = xb.d; - - const uint qlIndex = 64*(il/8) + 32*((il/2)&1) + 16*(il&1); - const uint qhIndex = 32*(il/8) + 16*(il&1); - float16_t sc = xb.scales[(il%2) + 2 * ((il/2))]; - il = (il/2) & 3; - - const uint16_t kmask1 = il>1 ? uint16_t(il>2 ? 192 : 48) : uint16_t(il>0 ? 12 : 3); - const uint16_t kmask2 = il>1 ? uint8_t(0xF0) : uint8_t(0x0F); - const float16_t coef = il>1 ? float16_t(1.f/16.f) : float16_t(1.f); - const float16_t ml = float16_t(d_all * sc * 32.f); - const float16_t dl = float16_t(d_all * sc * coef); - mat4 reg; - for (int i = 0; i < 16; ++i) { - const float16_t q = (il&1) != 0 ? ((xb.ql[qlIndex + i] & kmask2) | ((xb.qh[qhIndex + i] & kmask1) << 2)) - : ((xb.ql[qlIndex + i] & kmask2) | ((xb.qh[qhIndex + i] & kmask1) << 4)); - reg[i/4][i%4] = dl * q - ml; - } - return reg; -} - - -#define QK8_0 32 -// struct block_q8_0 { -// float16_t d; // delta -// int8_t qs[QK8_0]; // quants -// }; -#define sizeof_block_q8_0 34 diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_add.comp b/ggml/src/ggml-kompute/kompute-shaders/op_add.comp deleted file mode 100644 index b7b76a79dbdbe..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_add.comp +++ /dev/null @@ -1,58 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1024) in; - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { float inB[]; }; -layout(binding = 2) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb00; - int nb01; - int nb02; - int nb03; - int ne10; - int ne11; - int ne12; - int ne13; - int nb10; - int nb11; - int nb12; - int nb13; - int ne0; - int nb0; - int nb1; - int nb2; - int nb3; - //int offs; // TODO: needed for GGML_OP_ACC, see metal code -} pcs; - -// general-purpose kernel for addition of two tensors -// pros: works for non-contiguous tensors, supports broadcast across dims 1, 2 and 3 -// cons: not very efficient -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const uint i13 = i03 % pcs.ne13; - const uint i12 = i02 % pcs.ne12; - const uint i11 = i01 % pcs.ne11; - - int offs = 0; // TMP (see above) - - uint src0_off = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + offs) / 4); - uint src1_off = uint((i13*pcs.nb13 + i12*pcs.nb12 + i11*pcs.nb11 ) / 4); - uint dst_off = uint((i03*pcs.nb3 + i02*pcs.nb2 + i01*pcs.nb1 + offs) / 4); - - for (uint i0 = gl_LocalInvocationID.x; i0 < pcs.ne0; i0 += gl_WorkGroupSize.x) { - const uint i10 = i0 % pcs.ne10; - out_[pcs.outOff + dst_off + i0] = inA[pcs.inAOff + src0_off + i0] + inB[pcs.inBOff + src1_off + i10]; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp b/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp deleted file mode 100644 index 2376a6b8f036f..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_addrow.comp +++ /dev/null @@ -1,25 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { float inB[]; }; -layout(binding = 2) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inAOff; - uint inBOff; - uint outOff; - uint row; -} pcs; - -void main() { - const uint baseIndex = gl_WorkGroupID.x * 4; - - for (uint x = 0; x < 4; x++) { - const uint i = baseIndex + x; - out_[i + pcs.outOff] = inA[i + pcs.inAOff] + inB[(i % pcs.row) + pcs.inBOff]; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp deleted file mode 100644 index d57247d2dcc24..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f16.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "common.comp" - -#define IN_TYPE float16_t -#define IN_TYPE_SIZE 2 -#define OUT_TYPE float16_t -#define OUT_TYPE_SIZE 2 - -layout(local_size_x = 1024) in; - -layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; }; -layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; }; - -layout (push_constant) uniform parameter { - uint inOff; - uint outOff; - int ne00; - int ne01; - int ne02; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne0; - int ne1; - int ne2; - uint nb0; - uint nb1; - uint nb2; - uint nb3; -} pcs; - -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00; - - const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0); - const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0); - const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0; - const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0); - - const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_ - - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_ - out_[dst_data+i00] = OUT_TYPE(in_[src]); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp deleted file mode 100644 index b568bcd7b2665..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f16_f32.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "common.comp" - -#define IN_TYPE float16_t -#define IN_TYPE_SIZE 2 -#define OUT_TYPE float -#define OUT_TYPE_SIZE 4 - -layout(local_size_x = 1024) in; - -layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; }; -layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; }; - -layout (push_constant) uniform parameter { - uint inOff; - uint outOff; - int ne00; - int ne01; - int ne02; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne0; - int ne1; - int ne2; - uint nb0; - uint nb1; - uint nb2; - uint nb3; -} pcs; - -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00; - - const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0); - const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0); - const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0; - const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0); - - const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_ - - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_ - out_[dst_data+i00] = OUT_TYPE(in_[src]); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp deleted file mode 100644 index 99b22834308e5..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f16.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "common.comp" - -#define IN_TYPE float -#define IN_TYPE_SIZE 4 -#define OUT_TYPE float16_t -#define OUT_TYPE_SIZE 2 - -layout(local_size_x = 1024) in; - -layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; }; -layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; }; - -layout (push_constant) uniform parameter { - uint inOff; - uint outOff; - int ne00; - int ne01; - int ne02; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne0; - int ne1; - int ne2; - uint nb0; - uint nb1; - uint nb2; - uint nb3; -} pcs; - -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00; - - const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0); - const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0); - const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0; - const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0); - - const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_ - - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_ - out_[dst_data+i00] = OUT_TYPE(in_[src]); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp deleted file mode 100644 index 2fc998492b7f8..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_cpy_f32_f32.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "common.comp" - -#define IN_TYPE float -#define IN_TYPE_SIZE 4 -#define OUT_TYPE float -#define OUT_TYPE_SIZE 4 - -layout(local_size_x = 1024) in; - -layout (binding = 0) readonly buffer tensorIn { IN_TYPE in_[]; }; -layout (binding = 1) writeonly buffer tensorOut { OUT_TYPE out_[]; }; - -layout (push_constant) uniform parameter { - uint inOff; - uint outOff; - int ne00; - int ne01; - int ne02; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne0; - int ne1; - int ne2; - uint nb0; - uint nb1; - uint nb2; - uint nb3; -} pcs; - -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const int n = int(i03)*pcs.ne02*pcs.ne01*pcs.ne00 + int(i02)*pcs.ne01*pcs.ne00 + int(i01)*pcs.ne00; - - const int i3 = n / (pcs.ne2*pcs.ne1*pcs.ne0); - const int i2 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0) / (pcs.ne1*pcs.ne0); - const int i1 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0) / pcs.ne0; - const int i0 = (n - i3*pcs.ne2*pcs.ne1*pcs.ne0 - i2*pcs.ne1*pcs.ne0 - i1*pcs.ne0); - - const uint dst_data = (i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / OUT_TYPE_SIZE + pcs.outOff; // Based from out_ - - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - const uint src = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01 + i00*pcs.nb00) / IN_TYPE_SIZE) + pcs.inOff; // Based from in_ - out_[dst_data+i00] = OUT_TYPE(in_[src]); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp b/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp deleted file mode 100644 index 291c3fc1897ab..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_diagmask.comp +++ /dev/null @@ -1,30 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; - uint n_past; - int ne00; - int ne01; -} pcs; - -void main() { - const uint i02 = gl_WorkGroupID.z; - const uint i01 = gl_WorkGroupID.y; - const uint i00 = gl_WorkGroupID.x; - - const uint index = i02*pcs.ne01*pcs.ne00 + i01*pcs.ne00 + i00; - - if (i00 > pcs.n_past + i01) { - out_[index + pcs.outOff] = uintBitsToFloat(0xFF800000); - } else { - out_[index + pcs.outOff] = in_[index + pcs.inOff]; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp b/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp deleted file mode 100644 index 9d8c53710afbf..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_gelu.comp +++ /dev/null @@ -1,22 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; -} pcs; - -void main() { - const uint baseIndex = gl_WorkGroupID.x * 8; - - for (uint x = 0; x < 8; x++) { - const uint i = baseIndex + x; - const float y = in_[i + pcs.inOff]; - out_[i + pcs.outOff] = 0.5*y*(1.0 + tanh(clamp(SQRT_2_OVER_PI*y*(1.0 + GELU_COEF_A*y*y), -15.0, 15.0))); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp deleted file mode 100644 index 1a5581b23a9db..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows.comp +++ /dev/null @@ -1,17 +0,0 @@ -void main() { - const uint i = gl_WorkGroupID.x; - const int r = inB[i + pcs.inBOff]; - - int z = 0; - for (uint ind = gl_LocalInvocationID.x; ind < pcs.ne00/16; ind += gl_WorkGroupSize.x) { - const uint inIndex = (r * pcs.nb01 + pcs.inAOff) + ind/NL * SIZE_OF_BLOCK; - const mat4 result = dequantize_block(inIndex, ind%NL); - for (uint j = 0; j < 4; ++j) { - for (uint k = 0; k < 4; ++k) { - const uint outIndex = i * pcs.nb1/BYTES_FOR_TYPE + pcs.outOff + z; - out_[outIndex] = result[j][k]; - ++z; - } - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp deleted file mode 100644 index 48c9361081138..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f16.comp +++ /dev/null @@ -1,31 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout (binding = 0) readonly buffer tensorInA { float16_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { int inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb01; - int nb1; -} pcs; - -void dequantize_row_f16(uint x /*Based from inA unaligned*/, uint y /*Based from out_*/, int k) { - for (int j = 0; j < k; j++) { - out_[y + j] = inA[x + j]; - } -} - -void main() { - const uint i = gl_WorkGroupID.x; - const int r = inB[i + pcs.inBOff]; - - dequantize_row_f16(r*pcs.nb01/2/*bytes for float16*/ + pcs.inAOff, i*pcs.nb1/4 + pcs.outOff, pcs.ne00); -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp deleted file mode 100644 index 9d7acdaf8a8e4..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_f32.comp +++ /dev/null @@ -1,31 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout (binding = 0) readonly buffer tensorInA { float inA[]; }; -layout (binding = 1) readonly buffer tensorInB { int inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb01; - int nb1; -} pcs; - -void dequantize_row_f32(uint x /*Based from inA unaligned*/, uint y /*Based from out_*/, int k) { - for (int j = 0; j < k; j++) { - out_[y + j] = inA[x + j]; - } -} - -void main() { - const uint i = gl_WorkGroupID.x; - const int r = inB[i + pcs.inBOff]; - - dequantize_row_f32(r*pcs.nb01/4 + pcs.inAOff, i*pcs.nb1/4 + pcs.outOff, pcs.ne00); -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp deleted file mode 100644 index 32b2e891e8fcd..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_0.comp +++ /dev/null @@ -1,38 +0,0 @@ -#version 450 - -#include "common.comp" - -#define NL 2 -#define BYTES_FOR_TYPE 4 /*bytes for float*/ -#define SIZE_OF_BLOCK sizeof_block_q4_0 - -layout(local_size_x = 1) in; - -layout (binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { int inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb01; - int nb1; -} pcs; - -block_q4_0 get_unaligned_block_q4_0(uint index) { - block_q4_0 fres; - fres.d = u8BufToFloat16(inA, index); - [[unroll]] for (uint it = 0; it != QK4_0 / 2; it++) { - fres.qs[it] = inA[index+2+it]; - } - return fres; -} - -mat4 dequantize_block(uint index, uint il) { - const block_q4_0 block = get_unaligned_block_q4_0(index); - return dequantize_q4_0(block, il); -} - -#include "op_getrows.comp" diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp deleted file mode 100644 index 87f2fbe17bb3a..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q4_1.comp +++ /dev/null @@ -1,39 +0,0 @@ -#version 450 - -#include "common.comp" - -#define NL 2 -#define BYTES_FOR_TYPE 4 /*bytes for float*/ -#define SIZE_OF_BLOCK sizeof_block_q4_1 - -layout(local_size_x = 1) in; - -layout (binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { int inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb01; - int nb1; -} pcs; - -block_q4_1 get_unaligned_block_q4_1(uint index) { - block_q4_1 fres; - fres.d = u8BufToFloat16(inA, index); - fres.m = u8BufToFloat16(inA, index+2); - [[unroll]] for (uint it = 0; it != QK4_1 / 2; it++) { - fres.qs[it] = inA[index+4+it]; - } - return fres; -} - -mat4 dequantize_block(uint index, uint il) { - const block_q4_1 block = get_unaligned_block_q4_1(index); - return dequantize_q4_1(block, il); -} - -#include "op_getrows.comp" diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp b/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp deleted file mode 100644 index 9ce3545d1ecf4..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_getrows_q6_k.comp +++ /dev/null @@ -1,44 +0,0 @@ -#version 450 - -#include "common.comp" - -#define NL 16 -#define BYTES_FOR_TYPE 4 /*bytes for float*/ -#define SIZE_OF_BLOCK sizeof_block_q6_k - -layout(local_size_x = 1) in; - -layout (binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { int inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb01; - int nb1; -} pcs; - -block_q6_k get_unaligned_block_q6_k(uint index) { - block_q6_k fres; - [[unroll]] for (uint it = 0; it != QK_K / 2; it++) { - fres.ql[it] = inA[index + it]; - } - [[unroll]] for (uint it = 0; it != QK_K / 4; it++) { - fres.qh[it] = inA[index + QK_K/2 + it]; - } - [[unroll]] for (uint it = 0; it != QK_K / 16; it++) { - fres.scales[it] = int8_t(inA[index + QK_K/2 + QK_K/4 + it]); - } - fres.d = u8BufToFloat16(inA, index + QK_K/2 + QK_K/4 + QK_K/16); - return fres; -} - -mat4 dequantize_block(uint index, uint il) { - const block_q6_k block = get_unaligned_block_q6_k(index); - return dequantize_q6_k(block, il); -} - -#include "op_getrows.comp" diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp deleted file mode 100644 index c92647c4db1c8..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1024) in; - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { float inB[]; }; -layout(binding = 2) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int nb00; - int nb01; - int nb02; - int nb03; - int ne10; - int ne11; - int ne12; - int ne13; - int nb10; - int nb11; - int nb12; - int nb13; - int ne0; - int nb0; - int nb1; - int nb2; - int nb3; -} pcs; - -void main() { - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const uint i13 = i03 % pcs.ne13; - const uint i12 = i02 % pcs.ne12; - const uint i11 = i01 % pcs.ne11; - - uint src0_off = uint((i03*pcs.nb03 + i02*pcs.nb02 + i01*pcs.nb01) / 4); - uint src1_off = uint((i13*pcs.nb13 + i12*pcs.nb12 + i11*pcs.nb11) / 4); - uint dst_off = uint((i03*pcs.nb3 + i02*pcs.nb2 + i01*pcs.nb1) / 4); - - for (uint i0 = gl_LocalInvocationID.x; i0 < pcs.ne0; i0 += gl_WorkGroupSize.x) { - const uint i10 = i0 % pcs.ne10; - out_[pcs.outOff + dst_off + i0] = inA[pcs.inAOff + src0_off + i0] * inB[pcs.inBOff + src1_off + i10]; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp deleted file mode 100644 index 0ab1b2fc20eeb..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_f16.comp +++ /dev/null @@ -1,69 +0,0 @@ -#version 450 - -#include "common.comp" - -#extension GL_KHR_shader_subgroup_arithmetic : require - -layout(local_size_x_id = 0) in; - -layout (binding = 0) readonly buffer tensorInA { float16_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { float inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne01; - int ne02; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne10; - int ne11; - int ne12; - uint nb10; - uint nb11; - uint nb12; - uint nb13; - int ne0; - int ne1; - uint r2; - uint r3; -} pcs; - -#define N_F16_F32 4 - -void main() { - const uint r0 = gl_WorkGroupID.x; - const uint rb = gl_WorkGroupID.y*N_F16_F32; - const uint im = gl_WorkGroupID.z; - - const uint i12 = im%pcs.ne12; - const uint i13 = im/pcs.ne12; - - const uint offset0 = r0*pcs.nb01 + (i12/pcs.r2)*pcs.nb02 + (i13/pcs.r3)*pcs.nb03; - - const uint x = offset0 / 2 + pcs.inAOff; // Based from inA - - for (uint row = 0; row < N_F16_F32; ++row) { - uint r1 = rb + row; - if (r1 >= pcs.ne11) { - break; - } - - const uint y = (r1*pcs.nb11 + i12*pcs.nb12 + i13*pcs.nb13) / 4 + pcs.inBOff; - - float sumf = 0; - for (uint i = gl_SubgroupInvocationID.x; i < pcs.ne00; i += gl_SubgroupSize) { - sumf += float(inA[x+i]) * float(inB[y+i]); - } - - const float all_sum = subgroupAdd(sumf); - if (subgroupElect()) { - out_[im*pcs.ne1*pcs.ne0 + r1*pcs.ne0 + r0 + pcs.outOff] = all_sum; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp deleted file mode 100644 index d1ca4ad6c2528..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_mat_f32.comp +++ /dev/null @@ -1,51 +0,0 @@ -#version 450 - -#include "common.comp" - -#extension GL_KHR_shader_subgroup_arithmetic : require -#extension GL_EXT_debug_printf : enable - -// device subgroup size -layout (local_size_x_id = 0) in; - -layout(binding = 0) readonly buffer tensorInA { float inA[]; }; -layout(binding = 1) readonly buffer tensorInB { float inB[]; }; -layout(binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout(push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne01; - int ne02; - int ne11; - int ne12; - uint nb01; - uint nb02; - uint nb11; - uint nb12; - uint nb1; - uint nb2; -} -pcs; - - -void main() { - uvec3 gid = gl_WorkGroupID; - - uint bc_ab = pcs.ne12 > pcs.ne02 ? gid.z / (pcs.ne12 / pcs.ne02) : gid.z; - uint bc_ba = pcs.ne02 > pcs.ne12 ? gid.z / (pcs.ne02 / pcs.ne12) : gid.z; - - const uint x = (gid.x*pcs.nb01 + bc_ab*pcs.nb02) / 4 + pcs.inAOff; // Based from inA - const uint y = (gid.y*pcs.nb11 + bc_ba*pcs.nb12) / 4 + pcs.inBOff; // based from inB - float sum = 0.0f; - for (uint i = gl_SubgroupInvocationID.x; i < pcs.ne00; i += gl_SubgroupSize) { - sum += float(inA[x+i]) * float(inB[y+i]); - } - - const float all_sum = subgroupAdd(sum); - if (subgroupElect()) { - out_[gid.z*(pcs.nb2/4) + gid.y*(pcs.nb1/4) + gid.x + pcs.outOff] = all_sum; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp deleted file mode 100644 index b0cea8bbe67b9..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_0.comp +++ /dev/null @@ -1,33 +0,0 @@ -#version 450 - -#include "common.comp" - -#define BLOCKS_IN_QUANT QK4_0 -#define SIZE_OF_BLOCK sizeof_block_q4_0 -#define N_ROWS 4 - -#include "op_mul_mv_q_n_pre.comp" - -// The q4_0 version of this function -float block_q_n_dot_y(uint block_index, uint yb, uint il) { - vec2 acc = vec2(0.0, 0.0); - const uint index = (block_index) * SIZE_OF_BLOCK + pcs.inAOff; - float d = float(u8BufToFloat16(inA, index)); - float sumy = 0.0f; - for (int i = 0; i < BLOCKS_IN_QUANT/4; i+=2) { - const uint16_t b = u8BufToU16(inA, index + 2 + il + i); - - const float yl0 = inB[yb + i]; - const float yl1 = inB[yb + i + 1]; - const float yl8 = inB[yb + i + BLOCKS_IN_QUANT/2]; - const float yl9 = inB[yb + i + BLOCKS_IN_QUANT/2 + 1]; - - sumy += yl0 + yl1 + yl8 + yl9; - - acc[0] += yl0 * (b & 0x000F) + yl1 / 256.f * (b & 0x0F00); - acc[1] += yl8 / 16.f * (b & 0x00F0) + yl9 / 4096.f * (b & 0xF000); - } - return d * (sumy * -8.f + acc[0] + acc[1]); -} - -#include "op_mul_mv_q_n.comp" diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp deleted file mode 100644 index 8582c61a3beb9..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_1.comp +++ /dev/null @@ -1,35 +0,0 @@ -#version 450 - -#include "common.comp" - -#define BLOCKS_IN_QUANT QK4_1 -#define SIZE_OF_BLOCK sizeof_block_q4_1 -#define N_ROWS 4 - -#include "op_mul_mv_q_n_pre.comp" - -// The q4_1 version of this function -float block_q_n_dot_y(uint block_index, uint yb, uint il) { - vec2 acc = vec2(0.0, 0.0); - const uint index = (block_index) * SIZE_OF_BLOCK + pcs.inAOff; - float d = float(u8BufToFloat16(inA, index)); - float m = float(u8BufToFloat16(inA, index+2)); - - float sumy = 0.0f; - for (int i = 0; i < BLOCKS_IN_QUANT/4; i+=2) { - const uint16_t b = u8BufToU16(inA, index + 4 + il + i); - - const float yl0 = inB[yb + i]; - const float yl1 = inB[yb + i + 1]; - const float yl8 = inB[yb + i + BLOCKS_IN_QUANT/2]; - const float yl9 = inB[yb + i + BLOCKS_IN_QUANT/2 + 1]; - - sumy += yl0 + yl1 + yl8 + yl9; - - acc[0] += yl0 * (b & 0x000F) + yl1 / 256.f * (b & 0x0F00); - acc[1] += yl8 / 16.f * (b & 0x00F0) + yl9 / 4096.f * (b & 0xF000); - } - return d * (acc[0] + acc[1]) + sumy * m; -} - -#include "op_mul_mv_q_n.comp" diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp deleted file mode 100644 index a5752a3a0065f..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q4_k.comp +++ /dev/null @@ -1,140 +0,0 @@ -#version 450 - -#include "common.comp" - -#define N_DST 4 -#define SIZE_OF_BLOCK sizeof_block_q4_k - -layout(local_size_x = 4) in; -layout(local_size_y = 8) in; -layout(local_size_z = 1) in; - -layout (binding = 0) readonly buffer tensorInA { block_q4_k inA[]; }; -layout (binding = 1) readonly buffer tensorInB { float inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne10; - int ne0; - int ne1; - int ne01; - int ne02; - int ne12; - uint nb01; - uint nb02; - uint nb03; - uint nb11; - uint nb12; - uint nb13; - uint r2; - uint r3; -} pcs; - -void main() { - const uint16_t kmask1 = uint16_t(0x3f3f); - const uint16_t kmask2 = uint16_t(0x0f0f); - const uint16_t kmask3 = uint16_t(0xc0c0); - - const uint ix = gl_SubgroupInvocationID/8; // 0...3 - const uint it = gl_SubgroupInvocationID%8; // 0...7 - const uint iq = it/4; // 0 or 1 - const uint ir = it%4; // 0...3 - - const uint nb = pcs.ne00/QK_K; - - const uint r0 = gl_WorkGroupID.x; - const uint r1 = gl_WorkGroupID.y; - const uint im = gl_WorkGroupID.z; - - const uint first_row = r0 * N_DST; - const uint ib_row = first_row * nb; - - const uint i12 = im%pcs.ne12; - const uint i13 = im/pcs.ne12; - - const uint offset0 = first_row*(pcs.nb01/SIZE_OF_BLOCK) + (i12/pcs.r2)*(pcs.nb02/SIZE_OF_BLOCK) + (i13/pcs.r3)*(pcs.nb03/SIZE_OF_BLOCK); - const uint offset1 = r1*pcs.nb11 + (i12 )*pcs.nb12 + (i13 )*pcs.nb13; - - const uint xblk = offset0 + pcs.inAOff; - const uint y = (offset1 / 4) + pcs.inBOff; - - float yl[16]; - float yh[16]; - float sumf[N_DST] = {0.f, 0.f, 0.f, 0.f}; - float all_sum = 0.f; - - uint y4 = y + ix * QK_K + 64 * iq + 8 * ir; - - for (uint ib = ix; ib < nb; ib += 4) { - const uint blk_idx = ib + xblk; - - float sumy[4] = {0.f, 0.f, 0.f, 0.f}; - for (int i = 0; i < 8; ++i) { - yl[i+0] = inB[y4+i+ 0]; sumy[0] += yl[i+0]; - yl[i+8] = inB[y4+i+ 32]; sumy[1] += yl[i+8]; - yh[i+0] = inB[y4+i+128]; sumy[2] += yh[i+0]; - yh[i+8] = inB[y4+i+160]; sumy[3] += yh[i+8]; - } - - for (int row = 0; row < N_DST; row++) { - uint row_idx = row * (pcs.nb01 / SIZE_OF_BLOCK); - - uint16_t sc_0 = u8BufToU16(inA[blk_idx + row_idx].scales, iq * 2 + 0); - uint16_t sc_1 = u8BufToU16(inA[blk_idx + row_idx].scales, iq * 2 + 2); - uint16_t sc_2 = u8BufToU16(inA[blk_idx + row_idx].scales, iq * 2 + 4); - uint16_t sc_3 = u8BufToU16(inA[blk_idx + row_idx].scales, iq * 2 + 6); - uint16_t sc_4 = u8BufToU16(inA[blk_idx + row_idx].scales, iq * 2 + 8); - - uint16_t sc16[4]; - sc16[0] = sc_0 & kmask1; - sc16[1] = sc_2 & kmask1; - sc16[2] = ((sc_4 >> 0) & kmask2) | ((sc_0 & kmask3) >> 2); - sc16[3] = ((sc_4 >> 4) & kmask2) | ((sc_2 & kmask3) >> 2); - - float acc1[4] = {0.f, 0.f, 0.f, 0.f}; - float acc2[4] = {0.f, 0.f, 0.f, 0.f}; - for (int i = 0; i < 8; i += 2) { - uint16_t q1 = u8BufToU16(inA[blk_idx + row_idx].qs, 32 * iq + 8 * ir + i); - uint16_t q2 = u8BufToU16(inA[blk_idx + row_idx].qs, 64 + 32 * iq + 8 * ir + i); - acc1[0] += yl[i+0] * (q1 & 0x000F); - acc1[1] += yl[i+1] * (q1 & 0x0F00); - acc1[2] += yl[i+8] * (q1 & 0x00F0); - acc1[3] += yl[i+9] * (q1 & 0xF000); - acc2[0] += yh[i+0] * (q2 & 0x000F); - acc2[1] += yh[i+1] * (q2 & 0x0F00); - acc2[2] += yh[i+8] * (q2 & 0x00F0); - acc2[3] += yh[i+9] * (q2 & 0xF000); - } - - uint8_t sc8_0 = uint8_t(sc16[0] & 0xFF); - uint8_t sc8_1 = uint8_t(sc16[0] >> 8 ); - uint8_t sc8_2 = uint8_t(sc16[1] & 0xFF); - uint8_t sc8_3 = uint8_t(sc16[1] >> 8 ); - uint8_t sc8_4 = uint8_t(sc16[2] & 0xFF); - uint8_t sc8_5 = uint8_t(sc16[2] >> 8 ); - uint8_t sc8_6 = uint8_t(sc16[3] & 0xFF); - uint8_t sc8_7 = uint8_t(sc16[3] >> 8 ); - - float dall = float(inA[blk_idx + row_idx].d); - float dmin = float(inA[blk_idx + row_idx].dmin); - sumf[row] += dall * ((acc1[0] + 1.f/256.f * acc1[1]) * sc8_0 + - (acc1[2] + 1.f/256.f * acc1[3]) * sc8_1 * 1.f/16.f + - (acc2[0] + 1.f/256.f * acc2[1]) * sc8_4 + - (acc2[2] + 1.f/256.f * acc2[3]) * sc8_5 * 1.f/16.f) - - dmin * (sumy[0] * sc8_2 + sumy[1] * sc8_3 + sumy[2] * sc8_6 + sumy[3] * sc8_7); - } - - y4 += 4 * QK_K; - } - - for (int row = 0; row < N_DST; ++row) { - all_sum = subgroupAdd(sumf[row]); - if (subgroupElect()) { - out_[r1*pcs.ne0 + im*pcs.ne0*pcs.ne1 + first_row + row + pcs.outOff] = all_sum; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp deleted file mode 100644 index d331d1a70572e..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q6_k.comp +++ /dev/null @@ -1,106 +0,0 @@ -#version 450 - -#include "common.comp" - -#define SIZE_OF_BLOCK sizeof_block_q6_k - -layout(local_size_x_id = 0) in; -layout(local_size_y_id = 1) in; -layout(local_size_z = 1) in; - -layout (binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { float inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne10; - int ne0; - int ne1; - int ne01; - int ne02; - int ne12; - uint nb01; - uint nb02; - uint nb03; - uint nb11; - uint nb12; - uint nb13; - uint r2; - uint r3; -} pcs; - -void main() { - const uint8_t kmask1 = uint8_t(0x03); - const uint8_t kmask2 = uint8_t(0x0C); - const uint8_t kmask3 = uint8_t(0x30); - const uint8_t kmask4 = uint8_t(0xC0); - - const uint nb = pcs.ne00/QK_K; - - const uint r0 = gl_WorkGroupID.x; - const uint r1 = gl_WorkGroupID.y; - const uint im = gl_WorkGroupID.z; - - const uint row = (r0 * gl_NumSubgroups + gl_SubgroupID); - - const uint i12 = im%pcs.ne12; - const uint i13 = im/pcs.ne12; - - const uint x = row*(pcs.nb01/SIZE_OF_BLOCK) + (i12/pcs.r2)*(pcs.nb02/SIZE_OF_BLOCK) + (i13/pcs.r3)*(pcs.nb03/SIZE_OF_BLOCK); - const uint yy = (r1*pcs.nb11 + i12*pcs.nb12 + i13*pcs.nb13) / 4 + pcs.inBOff; - - float sumf = 0; - - // bits of invocation ID for gl_SubgroupSize=32: - // x x x x x - // 4 3 2 1 0 - // ( tid ) ix - // ip ( il ) - - const uint block_stride = gl_SubgroupSize / 16; // number of blocks each subgroup processes - const uint tid = gl_SubgroupInvocationID/block_stride; // first block_stride groups have tid=0 - const uint ix = gl_SubgroupInvocationID%block_stride; // first block is 0..block_stride-1 - const uint ip = tid/8; // first or second half of block (0 or 1) - const uint il = tid%8; // each half has 8 parts, one per scale - const uint n = 4; // 4 scales at a time (and 4 sums) - const uint l0 = n*il; // offset into half-block, 0..28 - const uint is = 8*ip + l0/16; // 0, 1, 8, 9 - - const uint y_offset = 128*ip + l0; - const uint q_offset_l = 64*ip + l0; - const uint q_offset_h = 32*ip + l0; - - for (uint i = ix; i < nb; i += block_stride) { - - const uint baseIndex = (x + i) * SIZE_OF_BLOCK + pcs.inAOff; - - const uint qlIndex = q_offset_l; - const uint q2Index = qlIndex + QK_K/8; - const uint qhIndex = q_offset_h; - const uint y = yy + i * QK_K + y_offset; - - float sums[4] = {0.0f, 0.0f, 0.0f, 0.0f}; - for (uint l = 0; l < n; ++l) { - const uint8_t currentQ1 = inA[baseIndex + qlIndex + l]; - const uint8_t currentQ2 = inA[baseIndex + q2Index + l]; - const uint8_t currentQh = inA[baseIndex + QK_K/2 + qhIndex + l]; - - sums[0] += inB[y+l+ 0] * (int8_t((currentQ1 & 0xF) | ((currentQh & kmask1) << 4)) - 32); - sums[1] += inB[y+l+32] * (int8_t((currentQ2 & 0xF) | ((currentQh & kmask2) << 2)) - 32); - sums[2] += inB[y+l+64] * (int8_t((currentQ1 >> 4) | ((currentQh & kmask3) << 0)) - 32); - sums[3] += inB[y+l+96] * (int8_t((currentQ2 >> 4) | ((currentQh & kmask4) >> 2)) - 32); - } - - float d = u8BufToFloat16(inA, baseIndex + QK_K/2 + QK_K/4 + QK_K/16); - sumf += d * (sums[0] * int8_t(inA[baseIndex + QK_K/2 + QK_K/4 + is]) + sums[1] * int8_t(inA[baseIndex + QK_K/2 + QK_K/4 + 2 + is]) + sums[2] * int8_t(inA[baseIndex + QK_K/2 + QK_K/4 + 4 + is]) + sums[3] * int8_t(inA[baseIndex + QK_K/2 + QK_K/4 + 6 + is])); - } - - const float tot = subgroupAdd(sumf); - if (subgroupElect()) { - out_[r1*pcs.ne0 + im*pcs.ne0*pcs.ne1 + row + pcs.outOff] = tot; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp deleted file mode 100644 index 34d015e90b84c..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mat_q8_0.comp +++ /dev/null @@ -1,73 +0,0 @@ -#version 450 - -#include "common.comp" - -#include "op_mul_mv_q_n_pre.comp" - -#define SIZE_OF_D 2 - -#define N_DST 4 // each SIMD group works on 4 rows -#define N_SIMDGROUP 2 // number of SIMD groups in a thread group -#define N_SIMDWIDTH 32 // assuming SIMD group size is 32 - -#define NB_Q8_0 8 - -void main() { - // NB: hack to make compatible with AMD GPUs that have a subgroup size of 64 - if (gl_SubgroupInvocationID > 31) - return; - - const int nr = N_DST; - const int nsg = N_SIMDGROUP; - const int nw = N_SIMDWIDTH; - - const int nb = pcs.ne00/QK8_0; - const uint r0 = gl_WorkGroupID.x; - const uint r1 = gl_WorkGroupID.y; - const uint im = gl_WorkGroupID.z; - - const uint first_row = (r0 * nsg + gl_SubgroupID) * nr; - - const uint i12 = im%pcs.ne12; - const uint i13 = im/pcs.ne12; - - const uint offset0 = first_row * nb + (i12/pcs.r2)*(nb*pcs.ne01) + (i13/pcs.r3)*(nb*pcs.ne01*pcs.ne02); - - const uint x = offset0*sizeof_block_q8_0 + pcs.inAOff; // Based from inA - const uint y = r1*pcs.ne10 + im*pcs.ne00*pcs.ne1 + pcs.inBOff; // based from inB - - float yl[NB_Q8_0]; - float sumf[N_DST]={0.f, 0.f, 0.f, 0.f}; - - const uint ix = gl_SubgroupInvocationID.x/4; - const uint il = gl_SubgroupInvocationID.x%4; - - uint yb = y + ix * QK8_0 + NB_Q8_0*il; - - // each thread in a SIMD group deals with NB_Q8_0 quants at a time - for (uint ib = ix; ib < nb; ib += nw/4) { - for (int i = 0; i < NB_Q8_0; ++i) { - yl[i] = inB[yb + i]; - } - - for (int row = 0; row < nr; row++) { - const uint block_offset = (ib+row*nb) * sizeof_block_q8_0; - float sumq = 0.f; - for (int iq = 0; iq < NB_Q8_0; ++iq) { - const int8_t qs_iq = int8_t(inA[x + block_offset + SIZE_OF_D + NB_Q8_0*il + iq]); - sumq += qs_iq * yl[iq]; - } - const float16_t d = u8BufToFloat16(inA, x + block_offset); - sumf[row] += sumq*d; - } - - yb += NB_Q8_0 * nw; - } - - for (int row = 0; row < nr; ++row) { - const float tot = subgroupAdd(sumf[row]); - if (subgroupElect() && first_row + row < pcs.ne01) { - out_[r1*pcs.ne0 + im*pcs.ne0*pcs.ne1 + first_row + row] = tot; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp deleted file mode 100644 index a6517cc1f1993..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n.comp +++ /dev/null @@ -1,52 +0,0 @@ -void main() { - // NB: hack to make compatible with AMD GPUs that have a subgroup size of 64 - if (gl_SubgroupInvocationID > 31) - return; - - const uint nb = uint(pcs.ne00/BLOCKS_IN_QUANT); - - const uint r0 = gl_WorkGroupID.x; - const uint r1 = gl_WorkGroupID.y; - const uint im = gl_WorkGroupID.z; - - const uint first_row = (r0 * gl_NumSubgroups + gl_SubgroupID) * N_ROWS; - - const uint i12 = im%pcs.ne12; - const uint i13 = im/pcs.ne12; - - // pointers to src0 rows - uint ax[N_ROWS]; - for (int row = 0; row < N_ROWS; ++row) { - const uint offset0 = (first_row + row)*(pcs.nb01/SIZE_OF_BLOCK) + (i12/pcs.r2)*(pcs.nb02/SIZE_OF_BLOCK) + (i13/pcs.r3)*(pcs.nb03/SIZE_OF_BLOCK); - - ax[row] = offset0 + pcs.inAOff; - } - - const uint y = (r1*pcs.nb11 + i12*pcs.nb12 + i13*pcs.nb13) / 4 + pcs.inBOff; - - float sumf[N_ROWS] = {0.0f, 0.0f, 0.0f, 0.0f}; - - const uint ix = gl_SubgroupInvocationID/2; - const uint il = (BLOCKS_IN_QUANT/4)*(gl_SubgroupInvocationID%2); - - uint yb = y + ix * BLOCKS_IN_QUANT + il; - - //debugPrintfEXT("gl_NumSubgroups=%d, gl_SubgroupID=%d, gl_SubgroupInvocationID=%d, glSubgroupSize=%d, gl_WorkGroupSize.x=%d, gl_WorkGroupSize.y=%d, gl_WorkGroupSize.z=%d\n", - // gl_NumSubgroups, gl_SubgroupID, gl_SubgroupInvocationID, gl_SubgroupSize, - // gl_WorkGroupSize.x, gl_WorkGroupSize.y, gl_WorkGroupSize.z); - - for (uint ib = ix; ib < nb; ib += 16) { - for (int row = 0; row < N_ROWS; row++) { - sumf[row] += block_q_n_dot_y(ax[row] + ib, yb, il); - } - - yb += BLOCKS_IN_QUANT * 16; - } - - for (int row = 0; row < N_ROWS; ++row) { - const float tot = subgroupAdd(sumf[row]); - if (first_row + row < pcs.ne01 && subgroupElect()) { - out_[r1*pcs.ne0 + im*pcs.ne0*pcs.ne1 + first_row + row + pcs.outOff] = tot; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp b/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp deleted file mode 100644 index a9a2f22180ffd..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_mul_mv_q_n_pre.comp +++ /dev/null @@ -1,28 +0,0 @@ -layout(local_size_x_id = 0) in; -layout(local_size_y = 8) in; -layout(local_size_z = 1) in; - -layout (binding = 0) readonly buffer tensorInA { uint8_t inA[]; }; -layout (binding = 1) readonly buffer tensorInB { float inB[]; }; -layout (binding = 2) writeonly buffer tensorOut { float out_[]; }; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne01; - int ne02; - int ne10; - int ne12; - int ne0; - int ne1; - uint nb01; - uint nb02; - uint nb03; - uint nb11; - uint nb12; - uint nb13; - uint r2; - uint r3; -} pcs; diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp b/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp deleted file mode 100644 index ad0c3c01b9dd0..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_norm.comp +++ /dev/null @@ -1,84 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 256) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; - uint ne00; - uint nb01; - float eps; -} pcs; - -shared float sum[gl_WorkGroupSize.x]; - -void main() { - const uint x = (gl_WorkGroupID.x*pcs.nb01/4) + pcs.inOff; // Based from in_ - // MEAN - // parallel sum - sum[gl_LocalInvocationID.x] = 0.0; - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - sum[gl_LocalInvocationID.x] += in_[x+i00]; - } - - // reduce - barrier(); - memoryBarrierShared(); - [[unroll]] for (uint i = gl_WorkGroupSize.x/2; i > 0; i /= 2) { - if (gl_LocalInvocationID.x < i) { - sum[gl_LocalInvocationID.x] += sum[gl_LocalInvocationID.x + i]; - } - barrier(); - memoryBarrierShared(); - } - - // broadcast - if (gl_LocalInvocationID.x == 0) { - sum[0] /= float(pcs.ne00); - } - barrier(); - memoryBarrierShared(); - const float mean = sum[0]; - - // recenter - const uint y = (gl_WorkGroupID.x*pcs.ne00) + pcs.outOff; // Based from out_ - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - out_[y+i00] = in_[x+i00] - mean; - } - - // VARIANCE - // parallel sum - sum[gl_LocalInvocationID.x] = 0.0; - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - sum[gl_LocalInvocationID.x] += out_[y+i00] * out_[y+i00]; - } - - // reduce - barrier(); - memoryBarrierShared(); - [[unroll]] for (uint i = gl_WorkGroupSize.x/2; i > 0; i /= 2) { - if (gl_LocalInvocationID.x < i) { - sum[gl_LocalInvocationID.x] += sum[gl_LocalInvocationID.x + i]; - } - barrier(); - memoryBarrierShared(); - } - - // broadcast - if (gl_LocalInvocationID.x == 0) { - sum[0] /= float(pcs.ne00); - } - barrier(); - memoryBarrierShared(); - const float variance = sum[0]; - - const float scale = 1.0f/sqrt(variance + pcs.eps); - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - out_[y+i00] *= scale; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp b/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp deleted file mode 100644 index 52a601fe6da6a..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_relu.comp +++ /dev/null @@ -1,21 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; -} pcs; - -void main() { - const uint baseIndex = gl_WorkGroupID.x * 4; - - for (uint x = 0; x < 4; x++) { - const uint i = baseIndex + x; - out_[i + pcs.outOff] = max(0.0, in_[i + pcs.inOff]); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp b/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp deleted file mode 100644 index da658c1601e7c..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_rmsnorm.comp +++ /dev/null @@ -1,53 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 512) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; - uint ne00; - uint nb01; - float eps; -} pcs; - -shared float sum[gl_WorkGroupSize.x]; - -void main() { - const uint x = (gl_WorkGroupID.x*pcs.nb01/4) + pcs.inOff; // Based from in_ - - // parallel sum - sum[gl_LocalInvocationID.x] = 0.0; - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - sum[gl_LocalInvocationID.x] += in_[x+i00] * in_[x+i00]; - } - - // reduce - barrier(); - memoryBarrierShared(); - [[unroll]] for (uint i = gl_WorkGroupSize.x/2; i > 0; i /= 2) { - if (gl_LocalInvocationID.x < i) { - sum[gl_LocalInvocationID.x] += sum[gl_LocalInvocationID.x + i]; - } - barrier(); - memoryBarrierShared(); - } - - // broadcast - if (gl_LocalInvocationID.x == 0) { - sum[0] /= float(pcs.ne00); - } - barrier(); - memoryBarrierShared(); - - const float scale = 1.0f/sqrt(sum[0] + pcs.eps); - - const uint y = (gl_WorkGroupID.x*pcs.ne00) + pcs.outOff; // Based from out_ - for (uint i00 = gl_LocalInvocationID.x; i00 < pcs.ne00; i00 += gl_WorkGroupSize.x) { - out_[y+i00] = in_[x+i00] * scale; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp deleted file mode 100644 index 63659cbfe5524..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f16.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "rope_common.comp" - -layout(binding = 0) buffer restrict readonly tensorInA { float16_t inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { int inB[]; }; -layout(binding = 2) buffer restrict readonly tensorInC { float inC[]; }; -layout(binding = 3) buffer restrict writeonly tensorOut { float16_t out_[]; }; - -void main() { - const uint i3 = gl_WorkGroupID.z; - const uint i2 = gl_WorkGroupID.y; - const uint i1 = gl_WorkGroupID.x; - - float corr_dims[2]; - rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); - - const float theta_scale = pow(pcs.freq_base, -2.0/pcs.n_dims); - - float theta_base = float(inB[pcs.inBOff + i2]); - float inv_ndims = -1.f/pcs.n_dims; - - float cos_theta; - float sin_theta; - - for (uint i0 = 2*gl_LocalInvocationIndex; i0 < pcs.ne0; i0 += 2*gl_WorkGroupSize.x) { - if (i0 < pcs.n_dims) { - uint ic = i0/2; - - float theta = theta_base * pow(pcs.freq_base, inv_ndims*i0); - - const float freq_factor = pcs.has_freq_factors ? inC[pcs.inCOff + ic] : 1.0f; - - rope_yarn(theta/freq_factor, pcs.freq_scale, corr_dims, i0, pcs.ext_factor, pcs.attn_factor, cos_theta, sin_theta); - - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + ic*pcs.nb00) / 2) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + ic*pcs.nb0) / 2) + pcs.outOff; // Based from out_ - - const float x0 = float(inA[src]); - const float x1 = float(inA[src+pcs.n_dims/2]); - - out_[dst_data] = float16_t(x0*cos_theta - x1*sin_theta); - out_[dst_data+pcs.n_dims/2] = float16_t(x0*sin_theta + x1*cos_theta); - } else { - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 2) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 2) + pcs.outOff; // Based from out_ - - out_[dst_data] = inA[src]; - out_[dst_data+1] = inA[src+1]; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp deleted file mode 100644 index 4df56204d7233..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_rope_neox_f32.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "rope_common.comp" - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { int inB[]; }; -layout(binding = 2) buffer restrict readonly tensorInC { float inC[]; }; -layout(binding = 3) buffer restrict writeonly tensorOut { float out_[]; }; - -void main() { - const uint i3 = gl_WorkGroupID.z; - const uint i2 = gl_WorkGroupID.y; - const uint i1 = gl_WorkGroupID.x; - - float corr_dims[2]; - rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); - - const float theta_scale = pow(pcs.freq_base, -2.0/pcs.n_dims); - - float theta_base = float(inB[pcs.inBOff + i2]); - float inv_ndims = -1.f/pcs.n_dims; - - float cos_theta; - float sin_theta; - - for (uint i0 = 2*gl_LocalInvocationIndex; i0 < pcs.ne0; i0 += 2*gl_WorkGroupSize.x) { - if (i0 < pcs.n_dims) { - uint ic = i0/2; - - float theta = theta_base * pow(pcs.freq_base, inv_ndims*i0); - - const float freq_factor = pcs.has_freq_factors ? inC[pcs.inCOff + ic] : 1.0f; - - rope_yarn(theta/freq_factor, pcs.freq_scale, corr_dims, i0, pcs.ext_factor, pcs.attn_factor, cos_theta, sin_theta); - - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + ic*pcs.nb00) / 4) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + ic*pcs.nb0) / 4) + pcs.outOff; // Based from out_ - - const float x0 = inA[src]; - const float x1 = inA[src+pcs.n_dims/2]; - - out_[dst_data] = x0*cos_theta - x1*sin_theta; - out_[dst_data+pcs.n_dims/2] = x0*sin_theta + x1*cos_theta; - } else { - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 4) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 4) + pcs.outOff; // Based from out_ - - out_[dst_data] = inA[src]; - out_[dst_data+1] = inA[src+1]; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp b/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp deleted file mode 100644 index a3c0eda8bd399..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f16.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "rope_common.comp" - -layout(binding = 0) buffer restrict readonly tensorInA { float16_t inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { int inB[]; }; -layout(binding = 2) buffer restrict readonly tensorInC { float inC[]; }; -layout(binding = 3) buffer restrict writeonly tensorOut { float16_t out_[]; }; - -void main() { - const uint i3 = gl_WorkGroupID.z; - const uint i2 = gl_WorkGroupID.y; - const uint i1 = gl_WorkGroupID.x; - - float corr_dims[2]; - rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); - - const float theta_scale = pow(pcs.freq_base, -2.0/pcs.n_dims); - - float theta_base = float(inB[pcs.inBOff + i2]); - float inv_ndims = -1.f/pcs.n_dims; - - float cos_theta; - float sin_theta; - - for (uint i0 = 2*gl_LocalInvocationIndex; i0 < pcs.ne0; i0 += 2*gl_WorkGroupSize.x) { - if (i0 < pcs.n_dims) { - uint ic = i0/2; - - float theta = theta_base * pow(pcs.freq_base, inv_ndims*i0); - - const float freq_factor = pcs.has_freq_factors ? inC[pcs.inCOff + ic] : 1.0f; - - rope_yarn(theta/freq_factor, pcs.freq_scale, corr_dims, i0, pcs.ext_factor, pcs.attn_factor, cos_theta, sin_theta); - - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 2) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 2) + pcs.outOff; // Based from out_ - - const float x0 = float(inA[src]); - const float x1 = float(inA[src+1]); - - out_[dst_data] = float16_t(x0*cos_theta - x1*sin_theta); - out_[dst_data+1] = float16_t(x0*sin_theta + x1*cos_theta); - } else { - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 2) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 2) + pcs.outOff; // Based from out_ - - out_[dst_data] = inA[src]; - out_[dst_data+1] = inA[src+1]; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp b/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp deleted file mode 100644 index b7963ae725390..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_rope_norm_f32.comp +++ /dev/null @@ -1,52 +0,0 @@ -#version 450 - -#include "rope_common.comp" - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { int inB[]; }; -layout(binding = 2) buffer restrict readonly tensorInC { float inC[]; }; -layout(binding = 3) buffer restrict writeonly tensorOut { float out_[]; }; - -void main() { - const uint i3 = gl_WorkGroupID.z; - const uint i2 = gl_WorkGroupID.y; - const uint i1 = gl_WorkGroupID.x; - - float corr_dims[2]; - rope_yarn_corr_dims(pcs.n_dims, pcs.n_ctx_orig, pcs.freq_base, pcs.beta_fast, pcs.beta_slow, corr_dims); - - const float theta_scale = pow(pcs.freq_base, -2.0/pcs.n_dims); - - float theta_base = float(inB[pcs.inBOff + i2]); - float inv_ndims = -1.f/pcs.n_dims; - - float cos_theta; - float sin_theta; - - for (uint i0 = 2*gl_LocalInvocationIndex; i0 < pcs.ne0; i0 += 2*gl_WorkGroupSize.x) { - if (i0 < pcs.n_dims) { - uint ic = i0/2; - - float theta = theta_base * pow(pcs.freq_base, inv_ndims*i0); - - const float freq_factor = pcs.has_freq_factors ? inC[pcs.inCOff + ic] : 1.0f; - - rope_yarn(theta/freq_factor, pcs.freq_scale, corr_dims, i0, pcs.ext_factor, pcs.attn_factor, cos_theta, sin_theta); - - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 4) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 4) + pcs.outOff; // Based from out_ - - const float x0 = inA[src]; - const float x1 = inA[src+1]; - - out_[dst_data] = x0*cos_theta - x1*sin_theta; - out_[dst_data+1] = x0*sin_theta + x1*cos_theta; - } else { - const uint src = uint((i3*pcs.nb03 + i2*pcs.nb02 + i1*pcs.nb01 + i0*pcs.nb00) / 4) + pcs.inAOff; // Based from in - const uint dst_data = uint((i3*pcs.nb3 + i2*pcs.nb2 + i1*pcs.nb1 + i0*pcs.nb0) / 4) + pcs.outOff; // Based from out_ - - out_[dst_data] = inA[src]; - out_[dst_data+1] = inA[src+1]; - } - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp b/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp deleted file mode 100644 index bdae267382093..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_scale.comp +++ /dev/null @@ -1,19 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; - float scale; -} pcs; - -void main() { - const uint i = gl_WorkGroupID.x; - out_[i + pcs.outOff] = in_[i + pcs.inOff] * pcs.scale; -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp b/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp deleted file mode 100644 index ada69754b2c14..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_scale_8.comp +++ /dev/null @@ -1,23 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; - float scale; -} pcs; - -void main() { - const uint baseIndex = gl_WorkGroupID.x * 8; - - for (uint x = 0; x < 8; x++) { - const uint i = baseIndex + x; - out_[i + pcs.outOff] = in_[i + pcs.inOff] * pcs.scale; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp b/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp deleted file mode 100644 index 0fb8e4b74056d..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_silu.comp +++ /dev/null @@ -1,22 +0,0 @@ -#version 450 - -#include "common.comp" - -layout(local_size_x = 1) in; - -layout(binding = 0) buffer restrict readonly tensorIn { float in_[]; }; -layout(binding = 1) buffer restrict writeonly tensorOut { float out_[]; }; -layout(push_constant) uniform PushConstants { - uint inOff; - uint outOff; -} pcs; - -void main() { - const uint baseIndex = gl_WorkGroupID.x * 4; - - for (uint x = 0; x < 4; x++) { - const uint i = baseIndex + x; - const float y = in_[i + pcs.inOff]; - out_[i + pcs.outOff] = y / (1.0 + exp(-y)); - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp b/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp deleted file mode 100644 index 4165295bf4b3c..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/op_softmax.comp +++ /dev/null @@ -1,72 +0,0 @@ -// TODO: implement multi-simd softmax (llama.cpp commit e16b9fa4) - -#version 450 - -#include "common.comp" - -layout(local_size_x_id = 0) in; - -layout(binding = 0) buffer restrict readonly tensorInA { float inA[]; }; -layout(binding = 1) buffer restrict readonly tensorInB { float inB[]; }; -layout(binding = 2) buffer restrict writeonly tensorOut { float out_[]; }; - -layout(push_constant) uniform PushConstants { - uint inAOff; - uint inBOff; - uint outOff; - int ne00; - int ne01; - int ne02; - float scale; - float max_bias; - float m0; - float m1; - uint n_head_log2; - int mask; -} pcs; - -void main() { - if (gl_SubgroupInvocationID > 31) - return; - - const uint i03 = gl_WorkGroupID.z; - const uint i02 = gl_WorkGroupID.y; - const uint i01 = gl_WorkGroupID.x; - - const uint extra_off = i03*pcs.ne02*pcs.ne01*pcs.ne00 + i02*pcs.ne01*pcs.ne00 + i01*pcs.ne00; - const uint psrc0 = extra_off + pcs.inAOff; // Based from inA - const uint pmask = i01*pcs.ne00 + pcs.inBOff; // Based from inB - const uint pdst = extra_off + pcs.outOff; // Based from out_ - - float slope = 1.0f; - - // ALiBi - if (pcs.max_bias > 0.0f) { - int64_t h = i02; - - float base = h < pcs.n_head_log2 ? pcs.m0 : pcs.m1; - int64_t exp = h < pcs.n_head_log2 ? h + 1 : 2*(h - pcs.n_head_log2) + 1; - - slope = pow(base, float(exp)); - } - - // parallel max - float localMax = uintBitsToFloat(0xFF800000); - for (uint i00 = gl_SubgroupInvocationID.x; i00 < pcs.ne00; i00 += 32) { - localMax = max(localMax, inA[psrc0 + i00]*pcs.scale + (pcs.mask!=0 ? slope*inB[pmask + i00] : 0.0f)); - } - float max_ = subgroupMax(localMax); - - // parallel sum - float localSum = 0.0f; - for (uint i00 = gl_SubgroupInvocationID.x; i00 < pcs.ne00; i00 += 32) { - const float exp_psrc0 = exp(inA[psrc0 + i00]*pcs.scale + (pcs.mask!=0 ? slope*inB[pmask + i00] : 0.0f) - max_); - localSum += exp_psrc0; - out_[pdst + i00] = exp_psrc0; - } - - const float sum = subgroupAdd(localSum); - for (uint i00 = gl_SubgroupInvocationID.x; i00 < pcs.ne00; i00 += 32) { - out_[pdst + i00] /= sum; - } -} diff --git a/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp b/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp deleted file mode 100644 index 0fca640dcc232..0000000000000 --- a/ggml/src/ggml-kompute/kompute-shaders/rope_common.comp +++ /dev/null @@ -1,71 +0,0 @@ -#include "common.comp" - -#define GGML_ROPE_TYPE_NEOX 2 - -// TODO: use a local size of 32 or more (Metal uses 1024) -layout(local_size_x = 1) in; - -layout (push_constant) uniform parameter { - uint inAOff; - uint inBOff; - uint inCOff; - uint outOff; - int n_dims; - int mode; - int n_ctx_orig; - float freq_base; - float freq_scale; - bool has_freq_factors; - float ext_factor; - float attn_factor; - float beta_fast; - float beta_slow; - uint nb00; - uint nb01; - uint nb02; - uint nb03; - int ne0; - uint nb0; - uint nb1; - uint nb2; - uint nb3; -} pcs; - -float rope_yarn_ramp(const float low, const float high, const float i0) { - const float y = (i0 / 2 - low) / max(0.001f, high - low); - return 1.0f - min(1.0f, max(0.0f, y)); -} - -// YaRN algorithm based on LlamaYaRNScaledRotaryEmbedding.py from https://github.com/jquesnelle/yarn -// MIT licensed. Copyright (c) 2023 Jeffrey Quesnelle and Bowen Peng. -void rope_yarn( - float theta_extrap, float freq_scale, float corr_dims[2], float i0, float ext_factor, float mscale, - out float cos_theta, out float sin_theta -) { - // Get n-d rotational scaling corrected for extrapolation - float theta_interp = freq_scale * theta_extrap; - float theta = theta_interp; - if (ext_factor != 0.0f) { - float ramp_mix = rope_yarn_ramp(corr_dims[0], corr_dims[1], i0) * ext_factor; - theta = theta_interp * (1 - ramp_mix) + theta_extrap * ramp_mix; - - // Get n-d magnitude scaling corrected for interpolation - mscale *= 1.0f + 0.1f * log(1.0f / freq_scale); - } - cos_theta = cos(theta) * mscale; - sin_theta = sin(theta) * mscale; -} - -// Apparently solving `n_rot = 2pi * x * base^((2 * max_pos_emb) / n_dims)` for x, we get -// `corr_fac(n_rot) = n_dims * log(max_pos_emb / (n_rot * 2pi)) / (2 * log(base))` -float rope_yarn_corr_factor(int n_dims, int n_ctx_orig, float n_rot, float base) { - return n_dims * log(n_ctx_orig / (n_rot * TWOPI_F)) / (2 * log(base)); -} - -void rope_yarn_corr_dims( - int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, out float dims[2] -) { - // start and end correction dims - dims[0] = max(0.0f, floor(rope_yarn_corr_factor(n_dims, n_ctx_orig, beta_fast, freq_base))); - dims[1] = min(n_dims - 1.0f, ceil(rope_yarn_corr_factor(n_dims, n_ctx_orig, beta_slow, freq_base))); -} diff --git a/ggml/src/ggml-metal/CMakeLists.txt b/ggml/src/ggml-metal/CMakeLists.txt index 77187efc1756d..0ca8a3c55ec44 100644 --- a/ggml/src/ggml-metal/CMakeLists.txt +++ b/ggml/src/ggml-metal/CMakeLists.txt @@ -71,7 +71,9 @@ else() # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 # note: unfortunately, we have to call it default.metallib instead of ggml.metallib # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 - set(XC_FLAGS -fno-fast-math -fno-inline -g) + # note: adding -g causes segmentation fault during compile + #set(XC_FLAGS -fno-fast-math -fno-inline -g) + set(XC_FLAGS -fno-fast-math -fno-inline) else() set(XC_FLAGS -O3) endif() @@ -90,7 +92,7 @@ else() add_custom_command( OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o - | - xcrun -sdk macosx metallib - -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib + xcrun -sdk macosx metallib - -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal DEPENDS ggml-metal.metal ${METALLIB_COMMON} diff --git a/ggml/src/ggml-metal/ggml-metal-impl.h b/ggml/src/ggml-metal/ggml-metal-impl.h index 17eab976f3ad1..752d55c216604 100644 --- a/ggml/src/ggml-metal/ggml-metal-impl.h +++ b/ggml/src/ggml-metal/ggml-metal-impl.h @@ -229,7 +229,11 @@ typedef struct { uint64_t nb21; uint64_t nb22; uint64_t nb23; + int32_t ne32; + int32_t ne33; uint64_t nb31; + uint64_t nb32; + uint64_t nb33; int32_t ne1; int32_t ne2; float scale; @@ -422,6 +426,17 @@ typedef struct { int32_t KHW; // KH * KW, pre-computed on CPU to save GPU resources } ggml_metal_kargs_im2col; +typedef struct{ + int32_t ne00; + uint64_t nb01; + int32_t ne10; + uint64_t nb11; + int32_t ne0; + uint64_t nb1; + int32_t i00; + int32_t i10; +} ggml_metal_kargs_glu; + typedef struct { int64_t ne00; int64_t ne01; @@ -450,9 +465,21 @@ typedef struct { } ggml_metal_kargs_sum_rows; typedef struct { - int64_t ne00; - int64_t ne01; - int64_t ne02; + int32_t ne00; + int32_t ne01; + int32_t ne02; + uint64_t nb01; + uint64_t nb02; + uint64_t nb03; + int32_t ne11; + int32_t ne12; + int32_t ne13; + uint64_t nb11; + uint64_t nb12; + uint64_t nb13; + uint64_t nb1; + uint64_t nb2; + uint64_t nb3; float scale; float max_bias; float m0; @@ -488,26 +515,25 @@ typedef struct { typedef struct { int64_t d_state; int64_t d_inner; + int64_t n_head; + int64_t n_group; int64_t n_seq_tokens; int64_t n_seqs; - uint64_t nb00; uint64_t nb01; uint64_t nb02; - uint64_t nb10; + uint64_t nb03; uint64_t nb11; uint64_t nb12; uint64_t nb13; - uint64_t nb20; uint64_t nb21; uint64_t nb22; - uint64_t nb30; uint64_t nb31; - uint64_t nb40; uint64_t nb41; uint64_t nb42; - uint64_t nb50; + uint64_t nb43; uint64_t nb51; uint64_t nb52; + uint64_t nb53; } ggml_metal_kargs_ssm_scan; typedef struct { @@ -521,6 +547,22 @@ typedef struct { uint64_t nb2; } ggml_metal_kargs_get_rows; +typedef struct { + int32_t nk0; + int32_t ne01; + uint64_t nb01; + uint64_t nb02; + uint64_t nb03; + int32_t ne11; + int32_t ne12; + uint64_t nb10; + uint64_t nb11; + uint64_t nb12; + uint64_t nb1; + uint64_t nb2; + uint64_t nb3; +} ggml_metal_kargs_set_rows; + typedef struct { int64_t ne00; int64_t ne01; diff --git a/ggml/src/ggml-metal/ggml-metal.m b/ggml/src/ggml-metal/ggml-metal.m index bc93bc633a49b..44ddc69d08f1c 100644 --- a/ggml/src/ggml-metal/ggml-metal.m +++ b/ggml/src/ggml-metal/ggml-metal.m @@ -48,22 +48,28 @@ int mtl_device_ref_count; id mtl_library; + NSLock * mtl_lock; + bool has_simdgroup_reduction; bool has_simdgroup_mm; bool has_residency_sets; bool has_bfloat; bool use_bfloat; + size_t max_size; + char name[128]; } g_ggml_ctx_dev_main = { /*.mtl_device =*/ nil, /*.mtl_device_ref_count =*/ 0, /*.mtl_library =*/ nil, + /*.mtl_lock =*/ nil, /*.has_simdgroup_reduction =*/ false, /*.has_simdgroup_mm =*/ false, /*.has_residency_sets =*/ false, /*.has_bfloat =*/ false, /*.use_bfloat =*/ false, + /*.max_size =*/ 0, /*.name =*/ "", }; @@ -71,6 +77,10 @@ static id ggml_backend_metal_device_acq(struct ggml_backend_metal_device_context * ctx) { assert(ctx != NULL); + if (ctx->mtl_lock == nil) { + ctx->mtl_lock = [[NSLock alloc] init]; + } + if (ctx->mtl_device == nil) { ctx->mtl_device = MTLCreateSystemDefaultDevice(); } @@ -94,6 +104,8 @@ ctx->use_bfloat = false; #endif + ctx->max_size = ctx->mtl_device.maxBufferLength; + strncpy(ctx->name, [[ctx->mtl_device name] UTF8String], sizeof(ctx->name) - 1); } @@ -110,6 +122,11 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte ctx->mtl_device_ref_count--; if (ctx->mtl_device_ref_count == 0) { + if (ctx->mtl_lock) { + [ctx->mtl_lock release]; + ctx->mtl_lock = nil; + } + if (ctx->mtl_library) { [ctx->mtl_library release]; ctx->mtl_library = nil; @@ -156,6 +173,12 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte GGML_METAL_KERNEL_TYPE_SILU, GGML_METAL_KERNEL_TYPE_SILU_4, GGML_METAL_KERNEL_TYPE_ELU, + GGML_METAL_KERNEL_TYPE_ABS, + GGML_METAL_KERNEL_TYPE_SGN, + GGML_METAL_KERNEL_TYPE_STEP, + GGML_METAL_KERNEL_TYPE_HARDSWISH, + GGML_METAL_KERNEL_TYPE_HARDSIGMOID, + GGML_METAL_KERNEL_TYPE_EXP, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, @@ -185,20 +208,33 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL, GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS, GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, + GGML_METAL_KERNEL_TYPE_SET_ROWS_F32, + GGML_METAL_KERNEL_TYPE_SET_ROWS_F16, + GGML_METAL_KERNEL_TYPE_SET_ROWS_BF16, + GGML_METAL_KERNEL_TYPE_SET_ROWS_Q8_0, + GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_0, + GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_1, + GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_0, + GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_1, + GGML_METAL_KERNEL_TYPE_SET_ROWS_IQ4_NL, GGML_METAL_KERNEL_TYPE_RMS_NORM, GGML_METAL_KERNEL_TYPE_L2_NORM, GGML_METAL_KERNEL_TYPE_GROUP_NORM, GGML_METAL_KERNEL_TYPE_NORM, GGML_METAL_KERNEL_TYPE_SSM_CONV_F32, GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32, + GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32_GROUP, GGML_METAL_KERNEL_TYPE_RWKV_WKV6_F32, GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32, GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, + GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4, GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32, + GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4, GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW, GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4, GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F16, GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32, + GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4, GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW, GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4, GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16, @@ -497,7 +533,13 @@ static void ggml_backend_metal_device_rel(struct ggml_backend_metal_device_conte GGML_METAL_KERNEL_TYPE_SIN, GGML_METAL_KERNEL_TYPE_COS, GGML_METAL_KERNEL_TYPE_NEG, + GGML_METAL_KERNEL_TYPE_REGLU, + GGML_METAL_KERNEL_TYPE_GEGLU, + GGML_METAL_KERNEL_TYPE_SWIGLU, + GGML_METAL_KERNEL_TYPE_GEGLU_ERF, + GGML_METAL_KERNEL_TYPE_GEGLU_QUICK, GGML_METAL_KERNEL_TYPE_SUM_ROWS, + GGML_METAL_KERNEL_TYPE_MEAN, GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, GGML_METAL_KERNEL_TYPE_ARGMAX, @@ -976,7 +1018,7 @@ @implementation GGMLMetalClass struct ggml_backend_metal_context * ctx = calloc(1, sizeof(struct ggml_backend_metal_context)); struct ggml_backend_metal_device_context * ctx_dev = dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + id device = ctx_dev->mtl_device; GGML_LOG_INFO("%s: picking default device: %s\n", __func__, [[device name] UTF8String]); @@ -990,9 +1032,16 @@ @implementation GGMLMetalClass ctx->d_queue = dispatch_queue_create("ggml-metal", DISPATCH_QUEUE_CONCURRENT); // load library - if (ctx_dev->mtl_library == nil) { - ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat); + { + [ctx_dev->mtl_lock lock]; + + if (ctx_dev->mtl_library == nil) { + ctx_dev->mtl_library = ggml_metal_load_library(device, ctx_dev->use_bfloat); + } + + [ctx_dev->mtl_lock unlock]; } + id metal_library = ctx_dev->mtl_library; if (metal_library == nil) { GGML_LOG_ERROR("%s: error: metal library is nil\n", __func__); @@ -1112,6 +1161,12 @@ @implementation GGMLMetalClass GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU, silu, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SILU_4, silu_4, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ELU, elu, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ABS, abs, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SGN, sgn, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_STEP, step, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_HARDSWISH, hardswish, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_HARDSIGMOID, hardsigmoid, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_EXP, exp, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16, soft_max_f16, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F16_4, soft_max_f16_4, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SOFT_MAX_F32, soft_max_f32, has_simdgroup_reduction); @@ -1141,20 +1196,33 @@ @implementation GGMLMetalClass GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_NL, get_rows_iq4_nl, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_IQ4_XS, get_rows_iq4_xs, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GET_ROWS_I32, get_rows_i32, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_F32, set_rows_f32, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_F16, set_rows_f16, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_BF16, set_rows_bf16, use_bfloat); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q8_0, set_rows_q8_0, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_0, set_rows_q4_0, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_1, set_rows_q4_1, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_0, set_rows_q5_0, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_1, set_rows_q5_1, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SET_ROWS_IQ4_NL, set_rows_iq4_nl, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RMS_NORM, rms_norm, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_L2_NORM, l2_norm, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GROUP_NORM, group_norm, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NORM, norm, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_CONV_F32, ssm_conv_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32, ssm_scan_f32, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32_GROUP, ssm_scan_f32_group, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RWKV_WKV6_F32, rwkv_wkv6_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_RWKV_WKV7_F32, rwkv_wkv7_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32, mul_mv_f32_f32, has_simdgroup_reduction); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4, mul_mv_f32_f32_c4, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32, mul_mv_bf16_f32, has_simdgroup_reduction && use_bfloat); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4, mul_mv_bf16_f32_c4, use_bfloat); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW, mul_mv_bf16_f32_1row, has_simdgroup_reduction && use_bfloat); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4, mul_mv_bf16_f32_l4, has_simdgroup_reduction && use_bfloat); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_BF16, mul_mv_bf16_bf16, has_simdgroup_reduction && use_bfloat); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32, mul_mv_f16_f32, has_simdgroup_reduction); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4, mul_mv_f16_f32_c4, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW, mul_mv_f16_f32_1row, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4, mul_mv_f16_f32_l4, has_simdgroup_reduction); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F16, mul_mv_f16_f16, has_simdgroup_reduction); @@ -1453,7 +1521,13 @@ @implementation GGMLMetalClass GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SIN, sin, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_COS, cos, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_NEG, neg, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_REGLU, reglu, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GEGLU, geglu, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SWIGLU, swiglu, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GEGLU_ERF, geglu_erf, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_GEGLU_QUICK, geglu_quick, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_SUM_ROWS, sum_rows, true); + GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_MEAN, mean, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_ARGMAX, argmax, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_AVG_F32, pool_2d_avg_f32, true); GGML_METAL_ADD_KERNEL(GGML_METAL_KERNEL_TYPE_POOL_2D_MAX_F32, pool_2d_max_f32, true); @@ -1603,6 +1677,10 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex const bool use_bfloat = ctx_dev->use_bfloat; if (!use_bfloat) { + if (op->type == GGML_TYPE_BF16) { + return false; + } + for (size_t i = 0, n = 3; i < n; ++i) { if (op->src[i] != NULL && op->src[i]->type == GGML_TYPE_BF16) { return false; @@ -1622,10 +1700,27 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_ELU: case GGML_UNARY_OP_NEG: + case GGML_UNARY_OP_ABS: + case GGML_UNARY_OP_SGN: + case GGML_UNARY_OP_STEP: + case GGML_UNARY_OP_HARDSWISH: + case GGML_UNARY_OP_HARDSIGMOID: + case GGML_UNARY_OP_EXP: return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32; default: return false; } + case GGML_OP_GLU: + switch (ggml_get_glu_op(op)) { + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + return ggml_is_contiguous_1(op->src[0]) && op->src[0]->type == GGML_TYPE_F32; + default: + return false; + } case GGML_OP_NONE: case GGML_OP_RESHAPE: case GGML_OP_VIEW: @@ -1653,9 +1748,10 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex case GGML_OP_LOG: return false; // TODO: implement case GGML_OP_SUM_ROWS: + case GGML_OP_MEAN: case GGML_OP_SOFT_MAX: case GGML_OP_GROUP_NORM: - return has_simdgroup_reduction && ggml_is_contiguous(op->src[0]); + return has_simdgroup_reduction && ggml_is_contiguous_rows(op->src[0]); case GGML_OP_RMS_NORM: case GGML_OP_L2_NORM: return has_simdgroup_reduction && (op->ne[0] % 4 == 0 && ggml_is_contiguous_1(op->src[0])); @@ -1771,6 +1867,27 @@ static bool ggml_metal_supports_op(const struct ggml_backend_metal_device_contex { return op->ne[3] == 1; } + case GGML_OP_SET_ROWS: + { + if (op->src[0]->type != GGML_TYPE_F32) { + return false; + } + + switch (op->type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_BF16: + case GGML_TYPE_Q8_0: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_IQ4_NL: + return true; + default: + return false; + }; + } default: return false; } @@ -2157,7 +2274,9 @@ static bool ggml_metal_encode_node( GGML_ASSERT(ggml_is_contiguous(src0)); float scale; - memcpy(&scale, dst->op_params, sizeof(scale)); + float bias; + memcpy(&scale, ((const int32_t *) dst->op_params) + 0, sizeof(float)); + memcpy(&bias, ((const int32_t *) dst->op_params) + 1, sizeof(float)); int64_t n = ggml_nelements(dst); @@ -2174,6 +2293,7 @@ static bool ggml_metal_encode_node( [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; [encoder setBytes:&scale length:sizeof(scale) atIndex:2]; + [encoder setBytes:&bias length:sizeof(bias) atIndex:3]; [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; @@ -2337,12 +2457,146 @@ static bool ggml_metal_encode_node( [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; + case GGML_UNARY_OP_ABS: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_ABS].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_SGN: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SGN].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_STEP: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_STEP].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_HARDSWISH: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_HARDSWISH].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_HARDSIGMOID: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_HARDSIGMOID].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; + case GGML_UNARY_OP_EXP: + { + id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_EXP].pipeline; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + + const int64_t n = ggml_nelements(dst); + + [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } break; default: { GGML_LOG_WARN("%s: node %3d, op = %8s not implemented\n", __func__, idx, ggml_op_name(dst->op)); GGML_ABORT("fatal error"); } } break; + case GGML_OP_GLU: + { + GGML_ASSERT(ggml_is_contiguous_1(src0)); + + if (src1) { + GGML_ASSERT(ggml_are_same_shape(src0, src1)); + } + + id pipeline = nil; + + switch (ggml_get_glu_op(node)) { + case GGML_GLU_OP_REGLU: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_REGLU].pipeline; + break; + case GGML_GLU_OP_GEGLU: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GEGLU].pipeline; + break; + case GGML_GLU_OP_SWIGLU: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SWIGLU].pipeline; + break; + case GGML_GLU_OP_GEGLU_ERF: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GEGLU_ERF].pipeline; + break; + case GGML_GLU_OP_GEGLU_QUICK: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_GEGLU_QUICK].pipeline; + break; + default: + GGML_ABORT("fatal error"); + } + + const int32_t swp = ((const int32_t *) dst->op_params)[1]; + + const int32_t i00 = swp ? ne0 : 0; + const int32_t i10 = swp ? 0 : ne0; + + ggml_metal_kargs_glu args = { + /*.ne00 =*/ ne00, + /*.nb01 =*/ nb01, + /*.ne10 =*/ src1 ? ne10 : ne00, + /*.nb11 =*/ src1 ? nb11 : nb01, + /*.ne0 =*/ ne0, + /*.nb1 =*/ nb1, + /*.i00 =*/ src1 ? 0 : i00, + /*.i10 =*/ src1 ? 0 : i10, + }; + + [encoder setComputePipelineState:pipeline]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; + if (src1) { + [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; + } else { + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:1]; + } + [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; + [encoder setBytes:&args length:sizeof(args) atIndex:3]; + + const int64_t nrows = ggml_nrows(src0); + + const int32_t nth = MIN((int) pipeline.maxTotalThreadsPerThreadgroup, ne00/2); + + [encoder dispatchThreadgroups:MTLSizeMake(nrows, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; + } break; case GGML_OP_SQR: { GGML_ASSERT(ggml_is_contiguous(src0)); @@ -2400,11 +2654,31 @@ static bool ggml_metal_encode_node( [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; case GGML_OP_SUM_ROWS: + case GGML_OP_MEAN: { GGML_ASSERT(src0->nb[0] == ggml_type_size(src0->type)); - id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SUM_ROWS].pipeline; + id pipeline = nil; + switch (dst->op) { + case GGML_OP_SUM_ROWS: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SUM_ROWS].pipeline; + break; + case GGML_OP_MEAN: + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MEAN].pipeline; + break; + default: + GGML_ABORT("fatal error"); + } + + int nth = 32; // SIMD width + + while (nth < ne00 && nth < (int) pipeline.maxTotalThreadsPerThreadgroup) { + nth *= 2; + } + + nth = MIN(nth, (int) pipeline.maxTotalThreadsPerThreadgroup); + nth = MIN(nth, ne00); ggml_metal_kargs_sum_rows args = { /*.ne00 =*/ ne00, @@ -2434,11 +2708,12 @@ static bool ggml_metal_encode_node( }; [encoder setComputePipelineState:pipeline]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - [encoder setBytes:&args length:sizeof(args) atIndex:2]; + [encoder setBytes:&args length:sizeof(args) atIndex:0]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:1]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; + [encoder setThreadgroupMemoryLength:32*sizeof(float) atIndex:0]; - [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; } break; case GGML_OP_SOFT_MAX: { @@ -2476,10 +2751,7 @@ static bool ggml_metal_encode_node( memcpy(&scale, ((const int32_t *) dst->op_params) + 0, sizeof(scale)); memcpy(&max_bias, ((const int32_t *) dst->op_params) + 1, sizeof(max_bias)); - const int64_t nrows_x = ggml_nrows(src0); - const int64_t nrows_y = src0->ne[1]; - - const uint32_t n_head = nrows_x/nrows_y; + const uint32_t n_head = src0->ne[2]; const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); @@ -2539,6 +2811,18 @@ static bool ggml_metal_encode_node( /*.ne00 =*/ ne00, /*.ne01 =*/ ne01, /*.ne02 =*/ ne02, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.ne11 =*/ ne11, + /*.ne12 =*/ ne12, + /*.ne13 =*/ ne13, + /*.nb11 =*/ nb11, + /*.nb12 =*/ nb12, + /*.nb13 =*/ nb13, + /*.nb1 =*/ nb1, + /*.nb2 =*/ nb2, + /*.nb3 =*/ nb3, /*.scale =*/ scale, /*.max_bias =*/ max_bias, /*.m0 =*/ m0, @@ -2558,7 +2842,7 @@ static bool ggml_metal_encode_node( [encoder setThreadgroupMemoryLength:32*sizeof(float) atIndex:0]; - [encoder dispatchThreadgroups:MTLSizeMake(ne01*ne02*ne03, 1, 1) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; + [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; } break; case GGML_OP_DIAG_MASK_INF: { @@ -2632,71 +2916,91 @@ static bool ggml_metal_encode_node( struct ggml_tensor * src3 = node->src[3]; struct ggml_tensor * src4 = node->src[4]; struct ggml_tensor * src5 = node->src[5]; + struct ggml_tensor * src6 = node->src[6]; GGML_ASSERT(src3); GGML_ASSERT(src4); GGML_ASSERT(src5); + GGML_ASSERT(src6); size_t offs_src3 = 0; size_t offs_src4 = 0; size_t offs_src5 = 0; + size_t offs_src6 = 0; id id_src3 = src3 ? ggml_metal_get_buffer(src3, &offs_src3) : nil; id id_src4 = src4 ? ggml_metal_get_buffer(src4, &offs_src4) : nil; id id_src5 = src5 ? ggml_metal_get_buffer(src5, &offs_src5) : nil; + id id_src6 = src6 ? ggml_metal_get_buffer(src6, &offs_src6) : nil; - const int64_t ne30 = src3->ne[0]; GGML_UNUSED(ne30); + const int64_t ne30 = src3->ne[0]; const int64_t ne31 = src3->ne[1]; GGML_UNUSED(ne31); - const uint64_t nb30 = src3->nb[0]; + const uint64_t nb30 = src3->nb[0]; GGML_UNUSED(nb30); const uint64_t nb31 = src3->nb[1]; const int64_t ne40 = src4->ne[0]; GGML_UNUSED(ne40); - const int64_t ne41 = src4->ne[1]; GGML_UNUSED(ne41); + const int64_t ne41 = src4->ne[1]; const int64_t ne42 = src4->ne[2]; GGML_UNUSED(ne42); + const int64_t ne43 = src4->ne[3]; GGML_UNUSED(ne43); - const uint64_t nb40 = src4->nb[0]; + const uint64_t nb40 = src4->nb[0]; GGML_UNUSED(nb40); const uint64_t nb41 = src4->nb[1]; const uint64_t nb42 = src4->nb[2]; + const uint64_t nb43 = src4->nb[3]; const int64_t ne50 = src5->ne[0]; GGML_UNUSED(ne50); const int64_t ne51 = src5->ne[1]; GGML_UNUSED(ne51); const int64_t ne52 = src5->ne[2]; GGML_UNUSED(ne52); + const int64_t ne53 = src5->ne[3]; GGML_UNUSED(ne53); - const uint64_t nb50 = src5->nb[0]; + const uint64_t nb50 = src5->nb[0]; GGML_UNUSED(nb50); const uint64_t nb51 = src5->nb[1]; const uint64_t nb52 = src5->nb[2]; + const uint64_t nb53 = src5->nb[3]; + + const int64_t ne60 = src6->ne[0]; GGML_UNUSED(ne60); + + const uint64_t nb60 = src6->nb[0]; GGML_UNUSED(nb60); const int64_t d_state = ne00; const int64_t d_inner = ne01; - const int64_t n_seq_tokens = ne11; - const int64_t n_seqs = ne02; + const int64_t n_head = ne02; + const int64_t n_group = ne41; + const int64_t n_seq_tokens = ne12; + const int64_t n_seqs = ne13; - id pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32].pipeline; + id pipeline = nil; + + if (ne30 == 1) { + // Mamba-2 + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32_GROUP].pipeline; + } else { + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SSM_SCAN_F32].pipeline; + } ggml_metal_kargs_ssm_scan args = { - /*.d_state =*/ d_state, - /*.d_inner =*/ d_inner, + /*.d_state =*/ d_state, + /*.d_inner =*/ d_inner, + /*.n_head =*/ n_head, + /*.n_group =*/ n_group, /*.n_seq_tokens =*/ n_seq_tokens, - /*.n_seqs =*/ n_seqs, - /*.nb00 =*/ nb00, - /*.nb01 =*/ nb01, - /*.nb02 =*/ nb02, - /*.nb10 =*/ nb10, - /*.nb11 =*/ nb11, - /*.nb12 =*/ nb12, - /*.nb13 =*/ nb13, - /*.nb20 =*/ nb20, - /*.nb21 =*/ nb21, - /*.nb22 =*/ nb22, - /*.nb30 =*/ nb30, - /*.nb31 =*/ nb31, - /*.nb40 =*/ nb40, - /*.nb41 =*/ nb41, - /*.nb42 =*/ nb42, - /*.nb50 =*/ nb50, - /*.nb51 =*/ nb51, - /*.nb52 =*/ nb52, + /*.n_seqs =*/ n_seqs, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.nb11 =*/ nb11, + /*.nb12 =*/ nb12, + /*.nb13 =*/ nb13, + /*.nb21 =*/ nb21, + /*.nb22 =*/ nb22, + /*.nb31 =*/ nb31, + /*.nb41 =*/ nb41, + /*.nb42 =*/ nb42, + /*.nb43 =*/ nb43, + /*.nb51 =*/ nb51, + /*.nb52 =*/ nb52, + /*.nb53 =*/ nb53, }; [encoder setComputePipelineState:pipeline]; @@ -2706,10 +3010,17 @@ static bool ggml_metal_encode_node( [encoder setBuffer:id_src3 offset:offs_src3 atIndex:3]; [encoder setBuffer:id_src4 offset:offs_src4 atIndex:4]; [encoder setBuffer:id_src5 offset:offs_src5 atIndex:5]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:6]; - [encoder setBytes:&args length:sizeof(args) atIndex:7]; + [encoder setBuffer:id_src6 offset:offs_src6 atIndex:6]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:7]; + [encoder setBytes:&args length:sizeof(args) atIndex:8]; - [encoder dispatchThreadgroups:MTLSizeMake(d_inner, n_seqs, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + if (ne30 == 1) { + // Mamba-2 + [encoder dispatchThreadgroups:MTLSizeMake(d_inner, n_head, n_seqs) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } else { + GGML_ASSERT(d_inner == 1); + [encoder dispatchThreadgroups:MTLSizeMake(n_head, n_seqs, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; + } } break; case GGML_OP_RWKV_WKV6: { @@ -3063,14 +3374,23 @@ static bool ggml_metal_encode_node( nsg = 1; nr0 = 1; nr1 = 4; - pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32].pipeline; + if (ne00 == 4) { + nr0 = 32; + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32_C4].pipeline; + } else { + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F32_F32].pipeline; + } } break; case GGML_TYPE_F16: { nsg = 1; nr0 = 1; if (src1t == GGML_TYPE_F32) { - if (ne11 * ne12 < 4) { + if (ne00 == 4) { + nr0 = 32; + nr1 = 4; + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_C4].pipeline; + } else if (ne11 * ne12 < 4) { pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_1ROW].pipeline; } else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) { pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_F16_F32_L4].pipeline; @@ -3089,7 +3409,11 @@ static bool ggml_metal_encode_node( nsg = 1; nr0 = 1; if (src1t == GGML_TYPE_F32) { - if (ne11 * ne12 < 4) { + if (ne00 == 4) { + nr0 = 32; + nr1 = 4; + pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_C4].pipeline; + } else if (ne11 * ne12 < 4) { pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_1ROW].pipeline; } else if (ne00 >= 128 && ne01 >= 8 && ne00%4 == 0) { pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_MUL_MV_BF16_F32_L4].pipeline; @@ -3710,13 +4034,74 @@ static bool ggml_metal_encode_node( }; [encoder setComputePipelineState:pipeline]; - [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_src1 offset:offs_src1 atIndex:1]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; - [encoder setBytes:&args length:sizeof(args) atIndex:3]; + [encoder setBytes:&args length:sizeof(args) atIndex:0]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:1]; + [encoder setBuffer:id_src1 offset:offs_src1 atIndex:2]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:3]; [encoder dispatchThreadgroups:MTLSizeMake(ne10, ne11, 1) threadsPerThreadgroup:MTLSizeMake(32, 1, 1)]; } break; + case GGML_OP_SET_ROWS: + { + id pipeline = nil; + + switch (dst->type) { + case GGML_TYPE_F32: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_F32 ].pipeline; break; + case GGML_TYPE_F16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_F16 ].pipeline; break; + case GGML_TYPE_BF16: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_BF16 ].pipeline; break; + case GGML_TYPE_Q8_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_Q8_0 ].pipeline; break; + case GGML_TYPE_Q4_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_0 ].pipeline; break; + case GGML_TYPE_Q4_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_Q4_1 ].pipeline; break; + case GGML_TYPE_Q5_0: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_0 ].pipeline; break; + case GGML_TYPE_Q5_1: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_Q5_1 ].pipeline; break; + case GGML_TYPE_IQ4_NL: pipeline = ctx->kernels[GGML_METAL_KERNEL_TYPE_SET_ROWS_IQ4_NL].pipeline; break; + default: GGML_ABORT("not implemented"); + } + + const int32_t nk0 = ne0/ggml_blck_size(dst->type); + + int nth = 32; // SIMD width + + while (nth < nk0 && nth < (int) pipeline.maxTotalThreadsPerThreadgroup) { + nth *= 2; + } + + int nrptg = 1; + if (nth > nk0) { + nrptg = (nth + nk0 - 1)/nk0; + nth = nk0; + + if (nrptg*nth > (int) pipeline.maxTotalThreadsPerThreadgroup) { + nrptg--; + } + } + + nth = MIN(nth, nk0); + + ggml_metal_kargs_set_rows args = { + /*.nk0 =*/ nk0, + /*.ne01 =*/ ne01, + /*.nb01 =*/ nb01, + /*.nb02 =*/ nb02, + /*.nb03 =*/ nb03, + /*.ne11 =*/ ne11, + /*.ne12 =*/ ne12, + /*.nb10 =*/ nb10, + /*.nb11 =*/ nb11, + /*.nb12 =*/ nb12, + /*.nb1 =*/ nb1, + /*.nb2 =*/ nb2, + /*.nb3 =*/ nb3, + }; + + [encoder setComputePipelineState:pipeline]; + [encoder setBytes:&args length:sizeof(args) atIndex:0]; + [encoder setBuffer:id_src0 offset:offs_src0 atIndex:1]; + [encoder setBuffer:id_src1 offset:offs_src1 atIndex:2]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:3]; + + [encoder dispatchThreadgroups:MTLSizeMake((ne01 + nrptg - 1)/nrptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, nrptg, 1)]; + } break; case GGML_OP_RMS_NORM: { GGML_ASSERT(ne00 % 4 == 0); @@ -3733,6 +4118,7 @@ static bool ggml_metal_encode_node( nth *= 2; } + nth = MIN(nth, (int) pipeline.maxTotalThreadsPerThreadgroup); nth = MIN(nth, ne00/4); ggml_metal_kargs_rms_norm args = { @@ -3769,6 +4155,7 @@ static bool ggml_metal_encode_node( nth *= 2; } + nth = MIN(nth, (int) pipeline.maxTotalThreadsPerThreadgroup); nth = MIN(nth, ne00/4); ggml_metal_kargs_l2_norm args = { @@ -3841,6 +4228,7 @@ static bool ggml_metal_encode_node( nth *= 2; } + nth = MIN(nth, (int) pipeline.maxTotalThreadsPerThreadgroup); nth = MIN(nth, ne00/4); ggml_metal_kargs_norm args = { @@ -4734,7 +5122,11 @@ static bool ggml_metal_encode_node( /*.nb21 =*/ nb21, /*.nb22 =*/ nb22, /*.nb23 =*/ nb23, + /*.ne32 =*/ ne32, + /*.ne33 =*/ ne33, /*.nb31 =*/ nb31, + /*.nb32 =*/ nb32, + /*.nb33 =*/ nb33, /*.ne1 =*/ ne1, /*.ne2 =*/ ne2, /*.scale =*/ scale, @@ -4927,8 +5319,39 @@ static bool ggml_metal_encode_node( default: GGML_ABORT("not implemented"); } + GGML_ASSERT(ne00 % ggml_blck_size(src0->type) == 0); + + // TODO: support + //const int32_t nk00 = ne00/ggml_blck_size(dst->type); + const int32_t nk00 = ne00; + + int nth = 32; // SIMD width + + while (nth < nk00 && nth < (int) pipeline.maxTotalThreadsPerThreadgroup) { + nth *= 2; + } + + nth = MIN(nth, (int) pipeline.maxTotalThreadsPerThreadgroup); + + // when rows are small, we can batch them together in a single threadgroup + int nrptg = 1; + + // TODO: relax this constraint in the future + if (ggml_blck_size(src0->type) == 1 && ggml_blck_size(dst->type) == 1) { + if (nth > nk00) { + nrptg = (nth + nk00 - 1)/nk00; + nth = nk00; + + if (nrptg*nth > (int) pipeline.maxTotalThreadsPerThreadgroup) { + nrptg--; + } + } + } + + nth = MIN(nth, nk00); + ggml_metal_kargs_cpy args = { - /*.ne00 =*/ ne00, + /*.ne00 =*/ nk00, /*.ne01 =*/ ne01, /*.ne02 =*/ ne02, /*.ne03 =*/ ne03, @@ -4951,11 +5374,7 @@ static bool ggml_metal_encode_node( [encoder setBuffer:id_src0 offset:offs_src0 atIndex:1]; [encoder setBuffer:id_dst offset:offs_dst atIndex:2]; - GGML_ASSERT(ne00 % ggml_blck_size(src0->type) == 0); - int nth = MIN(1024, ne00/ggml_blck_size(src0->type)); - - [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; - + [encoder dispatchThreadgroups:MTLSizeMake((ne01 + nrptg - 1)/nrptg, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, nrptg, 1)]; } break; case GGML_OP_SET: { @@ -5261,7 +5680,6 @@ static void ggml_backend_metal_buffer_free_buffer(ggml_backend_buffer_t buffer) } ggml_backend_metal_buffer_rset_free(ctx); - ggml_backend_metal_device_rel(buffer->buft->device->context); if (ctx->owned) { #if TARGET_OS_OSX @@ -5370,7 +5788,10 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba } struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)buft->device->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; ctx->all_data = ggml_metal_host_malloc(size_aligned); ctx->all_size = size_aligned; @@ -5393,14 +5814,12 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba if (size_aligned > 0 && (ctx->all_data == NULL || ctx->buffers[0].metal == nil)) { GGML_LOG_ERROR("%s: error: failed to allocate buffer, size = %8.2f MiB\n", __func__, size_aligned / 1024.0 / 1024.0); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5411,17 +5830,14 @@ static ggml_backend_buffer_t ggml_backend_metal_buffer_type_alloc_buffer(ggml_ba static size_t ggml_backend_metal_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { return 32; + GGML_UNUSED(buft); } static size_t ggml_backend_metal_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { - id device = ggml_backend_metal_device_acq(buft->device->context); - const size_t max_size = device.maxBufferLength; - ggml_backend_metal_device_rel(buft->device->context); + const size_t max_size = ((struct ggml_backend_metal_device_context *)buft->device->context)->max_size; return max_size; - - GGML_UNUSED(buft); } static bool ggml_backend_metal_buffer_type_is_host(ggml_backend_buffer_type_t buft) { @@ -5494,7 +5910,10 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz } struct ggml_backend_metal_device_context * ctx_dev = &g_ggml_ctx_dev_main; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; // the buffer fits into the max buffer size allowed by the device if (size_aligned <= device.maxBufferLength) { @@ -5550,7 +5969,6 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5566,10 +5984,8 @@ ggml_backend_buffer_t ggml_backend_metal_buffer_from_ptr(void * data, size_t siz } static void ggml_backend_metal_free(ggml_backend_t backend) { - struct ggml_backend_metal_context * ctx = backend->context; - struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; + struct ggml_backend_metal_context * ctx = backend->context; - ggml_backend_metal_device_rel(ctx_dev); ggml_metal_free(ctx); free(backend); @@ -5709,6 +6125,8 @@ bool ggml_backend_metal_supports_family(ggml_backend_t backend, int family) { struct ggml_backend_metal_device_context * ctx_dev = backend->device->context; + GGML_ASSERT(ctx_dev->mtl_device != nil); + return [ctx_dev->mtl_device supportsFamily:(MTLGPUFamilyApple1 + family - 1)]; } @@ -5728,10 +6146,7 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) { } static const char * ggml_backend_metal_device_get_description(ggml_backend_dev_t dev) { - // acq/rel just to populate ctx->name in case it hasn't been done yet struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - ggml_backend_metal_device_acq(ctx_dev); - ggml_backend_metal_device_rel(ctx_dev); return ctx_dev->name; } @@ -5739,12 +6154,10 @@ void ggml_backend_metal_capture_next_compute(ggml_backend_t backend) { static void ggml_backend_metal_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { if (@available(macOS 10.12, iOS 16.0, *)) { struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + id device = ctx_dev->mtl_device; *total = device.recommendedMaxWorkingSetSize; *free = *total - device.currentAllocatedSize; - - ggml_backend_metal_device_rel(ctx_dev); } else { *free = 1; *total = 1; @@ -5822,7 +6235,10 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back } struct ggml_backend_metal_device_context * ctx_dev = (struct ggml_backend_metal_device_context *)dev->context; - id device = ggml_backend_metal_device_acq(ctx_dev); + + GGML_ASSERT(ctx_dev->mtl_device != nil); + + id device = ctx_dev->mtl_device; // the buffer fits into the max buffer size allowed by the device if (size_aligned <= device.maxBufferLength) { @@ -5878,7 +6294,6 @@ static ggml_backend_buffer_t ggml_backend_metal_device_buffer_from_ptr(ggml_back if (!ggml_backend_metal_buffer_rset_init(ctx, ctx_dev, device)) { GGML_LOG_ERROR("%s: error: failed to initialize residency set\n", __func__); free(ctx); - ggml_backend_metal_device_rel(ctx_dev); return NULL; } @@ -5892,8 +6307,9 @@ static bool ggml_backend_metal_device_supports_op(ggml_backend_dev_t dev, const } static bool ggml_backend_metal_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { - return buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || - buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; + return + buft->iface.get_name == ggml_backend_metal_buffer_type_get_name || + buft->iface.get_name == ggml_backend_metal_buffer_from_ptr_type_get_name; GGML_UNUSED(dev); } @@ -5978,8 +6394,19 @@ static ggml_backend_dev_t ggml_backend_metal_reg_device_get(ggml_backend_reg_t r /* .get_proc_address = */ ggml_backend_metal_get_proc_address, }; +// called upon program exit +static void ggml_metal_cleanup(void) { + ggml_backend_metal_device_rel(&g_ggml_ctx_dev_main); +} + +// TODO: make thread-safe ggml_backend_reg_t ggml_backend_metal_reg(void) { - // TODO: make this thread-safe somehow? + ggml_backend_metal_device_acq(&g_ggml_ctx_dev_main); + + // register cleanup callback + // TODO: not ideal, but not sure if there is a better way to do this in Objective-C + atexit(ggml_metal_cleanup); + { g_ggml_backend_metal_reg = (struct ggml_backend_reg) { /* .api_version = */ GGML_BACKEND_API_VERSION, diff --git a/ggml/src/ggml-metal/ggml-metal.metal b/ggml/src/ggml-metal/ggml-metal.metal index 5d7760217f826..13235e2885241 100644 --- a/ggml/src/ggml-metal/ggml-metal.metal +++ b/ggml/src/ggml-metal/ggml-metal.metal @@ -35,6 +35,17 @@ constexpr constant static float kvalues_iq4nl_f[16] = { -127.f, -104.f, -83.f, -65.f, -49.f, -35.f, -22.f, -10.f, 1.f, 13.f, 25.f, 38.f, 53.f, 69.f, 89.f, 113.f }; +static inline int best_index_int8(int n, constant float * val, float x) { + if (x <= val[0]) return 0; + if (x >= val[n-1]) return n-1; + int ml = 0, mu = n-1; + while (mu-ml > 1) { + int mav = (ml+mu)/2; + if (x < val[mav]) mu = mav; else ml = mav; + } + return x - val[mu-1] < val[mu] - x ? mu-1 : mu; +} + // NOTE: this is not dequantizing - we are simply fitting the template template void dequantize_f32(device const float4x4 * src, short il, thread type4x4 & reg) { @@ -97,6 +108,178 @@ void dequantize_q4_0_t4(device const block_q4_0 * xb, short il, thread type4 & r } } +void quantize_q4_0(device const float * src, device block_q4_0 & dst) { +#pragma METAL fp math_mode(safe) + float amax = 0.0f; // absolute max + float max = 0.0f; + + for (int j = 0; j < QK4_0; j++) { + const float v = src[j]; + if (amax < fabs(v)) { + amax = fabs(v); + max = v; + } + } + + const float d = max / -8; + const float id = d ? 1.0f/d : 0.0f; + + dst.d = d; + + for (int j = 0; j < QK4_0/2; ++j) { + const float x0 = src[0 + j]*id; + const float x1 = src[QK4_0/2 + j]*id; + + const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f)); + const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f)); + + dst.qs[j] = xi0; + dst.qs[j] |= xi1 << 4; + } +} + +void quantize_q4_1(device const float * src, device block_q4_1 & dst) { +#pragma METAL fp math_mode(safe) + float min = FLT_MAX; + float max = -FLT_MAX; + + for (int j = 0; j < QK4_1; j++) { + const float v = src[j]; + if (min > v) min = v; + if (max < v) max = v; + } + + const float d = (max - min) / ((1 << 4) - 1); + const float id = d ? 1.0f/d : 0.0f; + + dst.d = d; + dst.m = min; + + for (int j = 0; j < QK4_1/2; ++j) { + const float x0 = (src[0 + j] - min)*id; + const float x1 = (src[QK4_1/2 + j] - min)*id; + + const uint8_t xi0 = MIN(15, (int8_t)(x0 + 0.5f)); + const uint8_t xi1 = MIN(15, (int8_t)(x1 + 0.5f)); + + dst.qs[j] = xi0; + dst.qs[j] |= xi1 << 4; + } +} + +void quantize_q5_0(device const float * src, device block_q5_0 & dst) { +#pragma METAL fp math_mode(safe) + float amax = 0.0f; // absolute max + float max = 0.0f; + + for (int j = 0; j < QK5_0; j++) { + const float v = src[j]; + if (amax < fabs(v)) { + amax = fabs(v); + max = v; + } + } + + const float d = max / -16; + const float id = d ? 1.0f/d : 0.0f; + + dst.d = d; + + uint32_t qh = 0; + for (int j = 0; j < QK5_0/2; ++j) { + const float x0 = src[0 + j]*id; + const float x1 = src[QK5_0/2 + j]*id; + + const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f)); + const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f)); + + dst.qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4); + qh |= ((xi0 & 0x10u) >> 4) << (j + 0); + qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2); + } + + thread const uint8_t * qh8 = (thread const uint8_t *)&qh; + + for (int j = 0; j < 4; ++j) { + dst.qh[j] = qh8[j]; + } +} + +void quantize_q5_1(device const float * src, device block_q5_1 & dst) { +#pragma METAL fp math_mode(safe) + float max = src[0]; + float min = src[0]; + + for (int j = 1; j < QK5_1; j++) { + const float v = src[j]; + min = v < min ? v : min; + max = v > max ? v : max; + } + + const float d = (max - min) / 31; + const float id = d ? 1.0f/d : 0.0f; + + dst.d = d; + dst.m = min; + + uint32_t qh = 0; + for (int j = 0; j < QK5_1/2; ++j) { + const float x0 = (src[0 + j] - min)*id; + const float x1 = (src[QK5_1/2 + j] - min)*id; + + const uint8_t xi0 = (uint8_t)(x0 + 0.5f); + const uint8_t xi1 = (uint8_t)(x1 + 0.5f); + + dst.qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4); + qh |= ((xi0 & 0x10u) >> 4) << (j + 0); + qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_1/2); + } + + thread const uint8_t * qh8 = (thread const uint8_t *)&qh; + + for (int j = 0; j < 4; ++j) { + dst.qh[j] = qh8[j]; + } +} + +void quantize_iq4_nl(device const float * src, device block_iq4_nl & dst) { +#pragma METAL fp math_mode(safe) + float amax = 0.0f; // absolute max + float max = 0.0f; + + for (int j = 0; j < QK4_NL; j++) { + const float v = src[j]; + if (amax < fabs(v)) { + amax = fabs(v); + max = v; + } + } + + const float d = max / kvalues_iq4nl_f[0]; + const float id = d ? 1.0f/d : 0.0f; + + float sumqx = 0, sumq2 = 0; + for (int j = 0; j < QK4_NL/2; ++j) { + const float x0 = src[0 + j]*id; + const float x1 = src[QK4_NL/2 + j]*id; + + const uint8_t xi0 = best_index_int8(16, kvalues_iq4nl_f, x0); + const uint8_t xi1 = best_index_int8(16, kvalues_iq4nl_f, x1); + + dst.qs[j] = xi0 | (xi1 << 4); + + const float v0 = kvalues_iq4nl_f[xi0]; + const float v1 = kvalues_iq4nl_f[xi1]; + const float w0 = src[0 + j]*src[0 + j]; + const float w1 = src[QK4_NL/2 + j]*src[QK4_NL/2 + j]; + sumqx += w0*v0*src[j] + w1*v1*src[QK4_NL/2 + j]; + sumq2 += w0*v0*v0 + w1*v1*v1; + + } + + dst.d = sumq2 > 0 ? sumqx/sumq2 : d; +} + template void dequantize_q4_1(device const block_q4_1 * xb, short il, thread type4x4 & reg) { device const uint16_t * qs = ((device const uint16_t *)xb + 2); @@ -279,6 +462,27 @@ void dequantize_q8_0_t4(device const block_q8_0 *xb, short il, thread type4 & re } } +void quantize_q8_0(device const float * src, device block_q8_0 & dst) { +#pragma METAL fp math_mode(safe) + float amax = 0.0f; // absolute max + + for (int j = 0; j < QK8_0; j++) { + const float v = src[j]; + amax = MAX(amax, fabs(v)); + } + + const float d = amax / ((1 << 7) - 1); + const float id = d ? 1.0f/d : 0.0f; + + dst.d = d; + + for (int j = 0; j < QK8_0; ++j) { + const float x0 = src[j]*id; + + dst.qs[j] = round(x0); + } +} + template void dequantize_q2_K(device const block_q2_K *xb, short il, thread type4x4 & reg) { const float d = xb->d; @@ -810,16 +1014,18 @@ kernel void kernel_scale( device const float * src0, device float * dst, constant float & scale, + constant float & bias, uint tpig[[thread_position_in_grid]]) { - dst[tpig] = src0[tpig] * scale; + dst[tpig] = src0[tpig] * scale + bias; } kernel void kernel_scale_4( device const float4 * src0, device float4 * dst, constant float & scale, + constant float & bias, uint tpig[[thread_position_in_grid]]) { - dst[tpig] = src0[tpig] * scale; + dst[tpig] = src0[tpig] * scale + bias; } kernel void kernel_clamp( @@ -993,31 +1199,214 @@ kernel void kernel_neg( dst[tpig] = -src0[tpig]; } -kernel void kernel_sum_rows( +kernel void kernel_abs( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = fabs(src0[tpig]); +} + +kernel void kernel_sgn( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + device const float & x = src0[tpig]; + dst[tpig] = (x > 0.0f) ? 1.0f : ((x < 0.0f) ? -1.0f : 0.0f); +} + +kernel void kernel_step( device const float * src0, device float * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = src0[tpig] > 0.0f ? 1.0f : 0.0f; +} + +kernel void kernel_hardswish( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + device const float & x = src0[tpig]; + dst[tpig] = x * fmin(1.0f, fmax(0.0f, (x + 3.0f) / 6.0f)); +} + +kernel void kernel_hardsigmoid( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + device const float & x = src0[tpig]; + dst[tpig] = fmin(1.0f, fmax(0.0f, (x + 3.0f) / 6.0f)); +} + +kernel void kernel_exp( + device const float * src0, + device float * dst, + uint tpig[[thread_position_in_grid]]) { + dst[tpig] = exp(src0[tpig]); +} + +kernel void kernel_reglu( + device const char * src0, + device const char * src1, + device char * dst, + constant ggml_metal_kargs_glu & args, + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float * src0_row = (device const float *) ((device const char *) src0 + tgpig*args.nb01) + args.i00; + device const float * src1_row = (device const float *) ((device const char *) src1 + tgpig*args.nb11) + args.i10; + device float * dst_row = (device float *) ((device char *) dst + tgpig*args.nb1); + + for (int i0 = tpitg; i0 < args.ne0; i0 += ntg) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + dst_row[i0] = x0*x1*(x0 > 0.0f); + } +} + +kernel void kernel_geglu( + device const char * src0, + device const char * src1, + device char * dst, + constant ggml_metal_kargs_glu & args, + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float * src0_row = (device const float *) ((device const char *) src0 + tgpig*args.nb01) + args.i00; + device const float * src1_row = (device const float *) ((device const char *) src1 + tgpig*args.nb11) + args.i10; + device float * dst_row = (device float *) ((device char *) dst + tgpig*args.nb1); + + for (int i0 = tpitg; i0 < args.ne0; i0 += ntg) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu = 0.5f*x0*(1.0f + precise::tanh(SQRT_2_OVER_PI*x0*(1.0f + GELU_COEF_A*x0*x0))); + + dst_row[i0] = gelu*x1; + } +} + +kernel void kernel_swiglu( + device const char * src0, + device const char * src1, + device char * dst, + constant ggml_metal_kargs_glu & args, + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float * src0_row = (device const float *) ((device const char *) src0 + tgpig*args.nb01) + args.i00; + device const float * src1_row = (device const float *) ((device const char *) src1 + tgpig*args.nb11) + args.i10; + device float * dst_row = (device float *) ((device char *) dst + tgpig*args.nb1); + + for (int i0 = tpitg; i0 < args.ne0; i0 += ntg) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float silu = x0 / (1.0f + exp(-x0)); + + dst_row[i0] = silu*x1; + } +} + +kernel void kernel_geglu_erf( + device const char * src0, + device const char * src1, + device char * dst, + constant ggml_metal_kargs_glu & args, + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float * src0_row = (device const float *) ((device const char *) src0 + tgpig*args.nb01) + args.i00; + device const float * src1_row = (device const float *) ((device const char *) src1 + tgpig*args.nb11) + args.i10; + device float * dst_row = (device float *) ((device char *) dst + tgpig*args.nb1); + + for (int i0 = tpitg; i0 < args.ne0; i0 += ntg) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu_erf = 0.5f*x0*(1.0f+erf_approx(x0*SQRT_2_INV)); + + dst_row[i0] = gelu_erf*x1; + } +} + +kernel void kernel_geglu_quick( + device const char * src0, + device const char * src1, + device char * dst, + constant ggml_metal_kargs_glu & args, + uint tgpig[[threadgroup_position_in_grid]], + uint tpitg[[thread_position_in_threadgroup]], + uint ntg[[threads_per_threadgroup]]) { + device const float * src0_row = (device const float *) ((device const char *) src0 + tgpig*args.nb01) + args.i00; + device const float * src1_row = (device const float *) ((device const char *) src1 + tgpig*args.nb11) + args.i10; + device float * dst_row = (device float *) ((device char *) dst + tgpig*args.nb1); + + for (int i0 = tpitg; i0 < args.ne0; i0 += ntg) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu_quick = x0*(1.0f/(1.0f+exp(GELU_QUICK_COEF*x0))); + + dst_row[i0] = gelu_quick*x1; + } +} + +template +kernel void kernel_sum_rows( constant ggml_metal_kargs_sum_rows & args, - uint3 tpig[[thread_position_in_grid]]) { - int64_t i3 = tpig.z; - int64_t i2 = tpig.y; - int64_t i1 = tpig.x; + device const float * src0, + device float * dst, + threadgroup float * shmem_f32 [[threadgroup(0)]], + uint3 tgpig[[threadgroup_position_in_grid]], + ushort3 tpitg[[thread_position_in_threadgroup]], + ushort sgitg[[simdgroup_index_in_threadgroup]], + ushort tiisg[[thread_index_in_simdgroup]], + ushort3 ntg[[threads_per_threadgroup]]) { + int64_t i3 = tgpig.z; + int64_t i2 = tgpig.y; + int64_t i1 = tgpig.x; if (i3 >= args.ne03 || i2 >= args.ne02 || i1 >= args.ne01) { return; } + if (sgitg == 0) { + shmem_f32[tiisg] = 0.0f; + } + device const float * src_row = (device const float *) ((device const char *) src0 + i1*args.nb01 + i2*args.nb02 + i3*args.nb03); device float * dst_row = (device float *) ((device char *) dst + i1*args.nb1 + i2*args.nb2 + i3*args.nb3); - float row_sum = 0; + float sumf = 0; + + for (int64_t i0 = tpitg.x; i0 < args.ne00; i0 += ntg.x) { + sumf += src_row[i0]; + } + + sumf = simd_sum(sumf); - for (int64_t i0 = 0; i0 < args.ne00; i0++) { - row_sum += src_row[i0]; + threadgroup_barrier(mem_flags::mem_threadgroup); + + if (tiisg == 0) { + shmem_f32[sgitg] = sumf; } - dst_row[0] = row_sum; + threadgroup_barrier(mem_flags::mem_threadgroup); + + sumf = shmem_f32[tiisg]; + sumf = simd_sum(sumf); + + if (tpitg.x == 0) { + dst_row[0] = norm ? sumf / args.ne00 : sumf; + } } +typedef decltype(kernel_sum_rows) kernel_sum_rows_t; + +template [[host_name("kernel_sum_rows")]] kernel kernel_sum_rows_t kernel_sum_rows; +template [[host_name("kernel_mean")]] kernel kernel_sum_rows_t kernel_sum_rows; + template kernel void kernel_soft_max( device const char * src0, @@ -1025,24 +1414,28 @@ kernel void kernel_soft_max( device char * dst, constant ggml_metal_kargs_soft_max & args, threadgroup float * buf [[threadgroup(0)]], - uint tgpig[[threadgroup_position_in_grid]], - uint tpitg[[thread_position_in_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tpitg[[thread_position_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], - uint ntg[[threads_per_threadgroup]]) { - const int64_t i03 = (tgpig) / (args.ne02*args.ne01); - const int64_t i02 = (tgpig - i03*args.ne02*args.ne01) / args.ne01; - const int64_t i01 = (tgpig - i03*args.ne02*args.ne01 - i02*args.ne01); + uint3 tptg[[threads_per_threadgroup]]) { + const int32_t i03 = tgpig.z; + const int32_t i02 = tgpig.y; + const int32_t i01 = tgpig.x; + + const int32_t i13 = i03%args.ne13; + const int32_t i12 = i02%args.ne12; + const int32_t i11 = i01; - device const float * psrc0 = (device const float *) src0 + (i03*args.ne02*args.ne01*args.ne00 + i02*args.ne01*args.ne00 + i01*args.ne00); - device const T * pmask = src1 != src0 ? (device const T *) src1 + i01*args.ne00 : nullptr; - device float * pdst = (device float *) dst + (i03*args.ne02*args.ne01*args.ne00 + i02*args.ne01*args.ne00 + i01*args.ne00); + device const float * psrc0 = (device const float *) (src0 + i01*args.nb01 + i02*args.nb02 + i03*args.nb03); + device const T * pmask = src1 != src0 ? (device const T * ) (src1 + i11*args.nb11 + i12*args.nb12 + i13*args.nb13) : nullptr; + device float * pdst = (device float *) (dst + i01*args.nb1 + i02*args.nb2 + i03*args.nb3); float slope = 1.0f; // ALiBi if (args.max_bias > 0.0f) { - const int64_t h = i02; + const int32_t h = i02; const float base = h < args.n_head_log2 ? args.m0 : args.m1; const int exp = h < args.n_head_log2 ? h + 1 : 2*(h - args.n_head_log2) + 1; @@ -1053,13 +1446,13 @@ kernel void kernel_soft_max( // parallel max float lmax = -INFINITY; - for (int i00 = tpitg; i00 < args.ne00; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00; i00 += tptg.x) { lmax = MAX(lmax, psrc0[i00]*args.scale + (pmask ? slope*pmask[i00] : 0.0f)); } // find the max value in the block float max_val = simd_max(lmax); - if (ntg > N_SIMDWIDTH) { + if (tptg.x > N_SIMDWIDTH) { if (sgitg == 0) { buf[tiisg] = -INFINITY; } @@ -1078,7 +1471,7 @@ kernel void kernel_soft_max( // parallel sum float lsum = 0.0f; - for (int i00 = tpitg; i00 < args.ne00; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00; i00 += tptg.x) { const float exp_psrc0 = exp((psrc0[i00]*args.scale + (pmask ? slope*pmask[i00] : 0.0f)) - max_val); lsum += exp_psrc0; pdst[i00] = exp_psrc0; @@ -1090,7 +1483,7 @@ kernel void kernel_soft_max( float sum = simd_sum(lsum); - if (ntg > N_SIMDWIDTH) { + if (tptg.x > N_SIMDWIDTH) { if (sgitg == 0) { buf[tiisg] = 0.0f; } @@ -1109,7 +1502,7 @@ kernel void kernel_soft_max( const float inv_sum = 1.0f/sum; - for (int i00 = tpitg; i00 < args.ne00; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00; i00 += tptg.x) { pdst[i00] *= inv_sum; } } @@ -1121,23 +1514,27 @@ kernel void kernel_soft_max_4( device char * dst, constant ggml_metal_kargs_soft_max & args, threadgroup float * buf [[threadgroup(0)]], - uint tgpig[[threadgroup_position_in_grid]], - uint tpitg[[thread_position_in_threadgroup]], + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tpitg[[thread_position_in_threadgroup]], uint sgitg[[simdgroup_index_in_threadgroup]], uint tiisg[[thread_index_in_simdgroup]], - uint ntg[[threads_per_threadgroup]]) { - const int64_t i03 = (tgpig) / (args.ne02*args.ne01); - const int64_t i02 = (tgpig - i03*args.ne02*args.ne01) / args.ne01; - const int64_t i01 = (tgpig - i03*args.ne02*args.ne01 - i02*args.ne01); + uint3 tptg[[threads_per_threadgroup]]) { + const int32_t i03 = tgpig.z; + const int32_t i02 = tgpig.y; + const int32_t i01 = tgpig.x; - device const float4 * psrc4 = (device const float4 *) src0 + (i03*args.ne02*args.ne01*args.ne00 + i02*args.ne01*args.ne00 + i01*args.ne00)/4; - device const T * pmask = src1 != src0 ? (device const T *) src1 + i01*args.ne00/4 : nullptr; - device float4 * pdst4 = (device float4 *) dst + (i03*args.ne02*args.ne01*args.ne00 + i02*args.ne01*args.ne00 + i01*args.ne00)/4; + const int32_t i13 = i03%args.ne13; + const int32_t i12 = i02%args.ne12; + const int32_t i11 = i01; + + device const float4 * psrc4 = (device const float4 *) (src0 + i01*args.nb01 + i02*args.nb02 + i03*args.nb03); + device const T * pmask = src1 != src0 ? (device const T * ) (src1 + i11*args.nb11 + i12*args.nb12 + i13*args.nb13) : nullptr; + device float4 * pdst4 = (device float4 *) (dst + i01*args.nb1 + i02*args.nb2 + i03*args.nb3); float slope = 1.0f; if (args.max_bias > 0.0f) { - const int64_t h = i02; + const int32_t h = i02; const float base = h < args.n_head_log2 ? args.m0 : args.m1; const int exp = h < args.n_head_log2 ? h + 1 : 2*(h - args.n_head_log2) + 1; @@ -1148,14 +1545,14 @@ kernel void kernel_soft_max_4( // parallel max float4 lmax4 = -INFINITY; - for (int i00 = tpitg; i00 < args.ne00/4; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00/4; i00 += tptg.x) { lmax4 = fmax(lmax4, psrc4[i00]*args.scale + (float4)((pmask ? slope*pmask[i00] : 0.0f))); } const float lmax = MAX(MAX(lmax4[0], lmax4[1]), MAX(lmax4[2], lmax4[3])); float max_val = simd_max(lmax); - if (ntg > N_SIMDWIDTH) { + if (tptg.x > N_SIMDWIDTH) { if (sgitg == 0) { buf[tiisg] = -INFINITY; } @@ -1174,7 +1571,7 @@ kernel void kernel_soft_max_4( // parallel sum float4 lsum4 = 0.0f; - for (int i00 = tpitg; i00 < args.ne00/4; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00/4; i00 += tptg.x) { const float4 exp_psrc4 = exp((psrc4[i00]*args.scale + (float4)((pmask ? slope*pmask[i00] : 0.0f))) - max_val); lsum4 += exp_psrc4; pdst4[i00] = exp_psrc4; @@ -1188,7 +1585,7 @@ kernel void kernel_soft_max_4( float sum = simd_sum(lsum); - if (ntg > N_SIMDWIDTH) { + if (tptg.x > N_SIMDWIDTH) { if (sgitg == 0) { buf[tiisg] = 0.0f; } @@ -1207,7 +1604,7 @@ kernel void kernel_soft_max_4( const float inv_sum = 1.0f/sum; - for (int i00 = tpitg; i00 < args.ne00/4; i00 += ntg) { + for (int i00 = tpitg.x; i00 < args.ne00/4; i00 += tptg.x) { pdst4[i00] *= inv_sum; } } @@ -1293,7 +1690,7 @@ kernel void kernel_ssm_conv_f32( x[0] = sumf; } -// ref: ggml.c:ggml_compute_forward_ssm_scan_f32 +// ref: ggml.c:ggml_compute_forward_ssm_scan_f32, Mamba-1 part kernel void kernel_ssm_scan_f32( device const void * src0, device const void * src1, @@ -1301,46 +1698,119 @@ kernel void kernel_ssm_scan_f32( device const void * src3, device const void * src4, device const void * src5, + device const void * src6, device float * dst, constant ggml_metal_kargs_ssm_scan & args, uint3 tgpig[[threadgroup_position_in_grid]], uint3 tpitg[[thread_position_in_threadgroup]], uint3 ntg[[threads_per_threadgroup]]) { - const int64_t ir = tgpig.x; - const int64_t i3 = tgpig.y; + const int64_t i1 = 0; + const int64_t ir = tgpig.x; // current head + const int64_t i3 = tgpig.y; // current seq + + const uint64_t nb00 = sizeof(float); + const uint64_t nb10 = sizeof(float); + const uint64_t nb20 = sizeof(float); const int64_t nc = args.d_state; - // const int64_t nr = args.d_inner; + const int64_t nr = args.d_inner; + const int64_t nh = args.n_head; + const int64_t ng = args.n_group; const int64_t n_t = args.n_seq_tokens; - // const int64_t n_s = args.n_seqs; + + const int64_t s_off = nr * nh * n_t * args.n_seqs * sizeof(float); + + device const int32_t * ids = (device const int32_t *) src6; + + device const float * s0 = (device const float *) ((device const char *) src0 + ir*args.nb02 + ids[i3]*args.nb03); + device float * s = (device float *) ((device char *) dst + ir*args.nb02 + i3*args.nb03 + s_off); for (int64_t i2 = 0; i2 < n_t; ++i2) { - device const float * s0 = (device const float *) ((device const char *) src0 + ir*args.nb01 + i3*args.nb02); - device const float * x = (device const float *) ((device const char *) src1 + ir*args.nb10 + i2*args.nb11 + i3*args.nb12); - device const float * dt = (device const float *) ((device const char *) src2 + ir*args.nb20 + i2*args.nb21 + i3*args.nb22); - device const float * A = (device const float *) ((device const char *) src3 + ir*args.nb31); - device const float * B = (device const float *) ((device const char *) src4 + i2*args.nb41 + i3*args.nb42); - device const float * C = (device const float *) ((device const char *) src5 + i2*args.nb51 + i3*args.nb52); - device float * y = (device float *) ((device char *) dst + ir*args.nb10 + i2*args.nb11 + i3*args.nb12); // TODO: do not use src1 strides - device float * s = (device float *) ((device char *) dst + ir*args.nb01 + i3*args.nb02 + args.nb13); - - if (i2 > 0) { - s0 = s; - } - - // i1 == 0 - float dt_soft_plus = dt[0] <= 20.0f ? log(1.0f + exp(dt[0])) : dt[0]; - float x_dt = x[0] * dt_soft_plus; + device const float * x = (device const float *) ((device const char *) src1 + i1*nb10 + ir*args.nb11 + i2*args.nb12 + i3*args.nb13); // {dim, nh, nt, ns} + device const float * dt = (device const float *) ((device const char *) src2 + ir*nb20 + i2*args.nb21 + i3*args.nb22); // {nh, nt, ns} + device const float * A = (device const float *) ((device const char *) src3 + ir*args.nb31); // {d_state, nh} + device const float * B = (device const float *) ((device const char *) src4 + (ir & (ng - 1))*args.nb41 + i2*args.nb42 + i3*args.nb43); // {d_state, ng, nt, ns} + device const float * C = (device const float *) ((device const char *) src5 + (ir & (ng - 1))*args.nb51 + i2*args.nb52 + i3*args.nb53); // {d_state, ng, nt, ns} + device float * y = (device float *) ((device char *) dst + (i1 + ir*(nr) + i2*(nh*nr) + i3*(n_t*nh*nr))*nb00); // {dim, nh, nt, ns} + + const float dt_soft_plus = dt[0] <= 20.0f ? log(1.0f + exp(dt[0])) : dt[0]; + const float x_dt = x[0] * dt_soft_plus; float sumf = 0.0f; for (int64_t i0 = 0; i0 < nc; ++i0) { - int64_t i = i0; - float state = (s0[i] * exp(dt_soft_plus * A[i])) + (B[i0] * x_dt); + const int64_t i = i0 + i1*nc; + const float state = (s0[i] * exp(dt_soft_plus * A[i0])) + (B[i0] * x_dt); sumf += state * C[i0]; s[i] = state; } y[0] = sumf; + + // recurse + s0 = s; + } +} + +// ref: ggml.c:ggml_compute_forward_ssm_scan_f32, Mamba-2 part +// TODO: optimize (e.g. by parallelizing over d_state) +kernel void kernel_ssm_scan_f32_group( + device const void * src0, + device const void * src1, + device const void * src2, + device const void * src3, + device const void * src4, + device const void * src5, + device const void * src6, + device float * dst, + constant ggml_metal_kargs_ssm_scan & args, + uint3 tgpig[[threadgroup_position_in_grid]], + uint3 tpitg[[thread_position_in_threadgroup]], + uint3 ntg[[threads_per_threadgroup]]) { + const int64_t i1 = tgpig.x; + const int64_t ir = tgpig.y; // current head + const int64_t i3 = tgpig.z; // current seq + + const uint64_t nb00 = sizeof(float); + const uint64_t nb10 = sizeof(float); + const uint64_t nb20 = sizeof(float); + + const int64_t nc = args.d_state; + const int64_t nr = args.d_inner; + const int64_t nh = args.n_head; + const int64_t ng = args.n_group; + const int64_t n_t = args.n_seq_tokens; + + const int64_t s_off = nr * nh * n_t * args.n_seqs * sizeof(float); + + device const int32_t * ids = (device const int32_t *) src6; + + device const float * s0 = (device const float *) ((device const char *) src0 + ir*args.nb02 + ids[i3]*args.nb03); + device float * s = (device float *) ((device char *) dst + ir*args.nb02 + i3*args.nb03 + s_off); + + for (int64_t i2 = 0; i2 < n_t; ++i2) { + device const float * x = (device const float *) ((device const char *) src1 + i1*nb10 + ir*args.nb11 + i2*args.nb12 + i3*args.nb13); // {dim, nh, nt, ns} + device const float * dt = (device const float *) ((device const char *) src2 + ir*nb20 + i2*args.nb21 + i3*args.nb22); // {nh, nt, ns} + device const float * A = (device const float *) ((device const char *) src3 + ir*args.nb31); // {1, nh} + device const float * B = (device const float *) ((device const char *) src4 + (ir & (ng - 1))*args.nb41 + i2*args.nb42 + i3*args.nb43); // {d_state, ng, nt, ns} + device const float * C = (device const float *) ((device const char *) src5 + (ir & (ng - 1))*args.nb51 + i2*args.nb52 + i3*args.nb53); // {d_state, ng, nt, ns} + device float * y = (device float *) ((device char *) dst + (i1 + ir*(nr) + i2*(nh*nr) + i3*(n_t*nh*nr))*nb00); // {dim, nh, nt, ns} + + const float dt_soft_plus = dt[0] <= 20.0f ? log(1.0f + exp(dt[0])) : dt[0]; + const float x_dt = x[0] * dt_soft_plus; + const float dA = exp(dt_soft_plus * A[0]); + float sumf = 0.0f; + + for (int64_t i0 = 0; i0 < nc; ++i0) { + const int64_t i = i0 + i1*nc; + const float state = (s0[i] * dA) + (B[i0] * x_dt); + sumf += state * C[i0]; + s[i] = state; + } + + y[0] = sumf; + + // recurse + s0 = s; } } @@ -2502,6 +2972,70 @@ template [[host_name("kernel_mul_mv_bf16_f32")]] kernel mul_mv_t kernel_mul_mv< template [[host_name("kernel_mul_mv_bf16_bf16")]] kernel mul_mv_t kernel_mul_mv; #endif +template +void kernel_mul_mv_c4_impl( + args_t args, + device const char * src0, + device const char * src1, + device char * dst, + uint3 tgpig, + ushort tiisg) { + const int r0 = tgpig.x*32 + tiisg; + const int rb = tgpig.y*N_MV_T_T; + const int im = tgpig.z; + + if (r0 >= args.ne01) { + return; + } + + const uint i12 = im%args.ne12; + const uint i13 = im/args.ne12; + + const uint64_t offset0 = r0*args.nb01 + (i12/args.r2)*args.nb02 + (i13/args.r3)*args.nb03; + + device const T04 * x = (device const T04 *) (src0 + offset0); + + device float * dst_f32 = (device float *) dst + (uint64_t)im*args.ne0*args.ne1; + + for (int row = 0; row < N_MV_T_T; ++row) { + int r1 = rb + row; + if (r1 >= args.ne11) { + break; + } + + const uint64_t offset1 = r1*args.nb11 + (i12 )*args.nb12 + (i13 )*args.nb13; + + device const T14 * y = (device const T14 *) (src1 + offset1); + + dst_f32[(uint64_t)r1*args.ne0 + r0] = dot((float4) x[0], (float4) y[0]); + } +} + +template +kernel void kernel_mul_mv_c4( + constant ggml_metal_kargs_mul_mv & args, + device const char * src0, + device const char * src1, + device char * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + ushort tiisg[[thread_index_in_simdgroup]]) { + kernel_mul_mv_c4_impl( + args, + src0, + src1, + dst, + tgpig, + tiisg); +} + +typedef decltype(kernel_mul_mv_c4) mul_mv_c4_t; + +template [[host_name("kernel_mul_mv_f32_f32_c4")]] kernel mul_mv_c4_t kernel_mul_mv_c4; +template [[host_name("kernel_mul_mv_f16_f32_c4")]] kernel mul_mv_c4_t kernel_mul_mv_c4; +#if defined(GGML_METAL_USE_BF16) +template [[host_name("kernel_mul_mv_bf16_f32_c4")]] kernel mul_mv_c4_t kernel_mul_mv_c4; +#endif + template kernel void kernel_mul_mv_1row( constant ggml_metal_kargs_mul_mv & args, @@ -3417,7 +3951,7 @@ kernel void kernel_flash_attn_ext( // load the mask in shared memory #pragma unroll(Q) for (short j = 0; j < Q; ++j) { - device const half * pm = (device const half *) ((device const char *) mask + (iq1 + j)*args.nb31); + device const half * pm = (device const half *) ((device const char *) mask + (iq1 + j)*args.nb31 + (iq2%args.ne32)*args.nb32 + (iq3%args.ne33)*args.nb33); const float m = pm[ic + tiisg]; @@ -3903,7 +4437,7 @@ kernel void kernel_flash_attn_ext_vec( const bool has_mask = mask != q; // pointer to the mask - device const half * pm = (device const half *) (mask + iq1*args.nb31); + device const half * pm = (device const half *) (mask + iq1*args.nb31 + (iq2%args.ne32)*args.nb32 + (iq3%args.ne33)*args.nb33); float slope = 1.0f; @@ -4276,11 +4810,16 @@ kernel void kernel_cpy( device const char * src0, device char * dst, uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], ushort3 tpitg[[thread_position_in_threadgroup]], - ushort3 ntg[[threads_per_threadgroup]]) { + ushort3 tptg[[threads_per_threadgroup]]) { const int i03 = tgpig[2]; const int i02 = tgpig[1]; - const int i01 = tgpig[0]; + const int i01 = tgpig[0]*tptg.y + tiitg/tptg.x; + + if (i01 >= args.ne01) { + return; + } const int64_t n = i03*args.ne02*args.ne01*args.ne00 + i02*args.ne01*args.ne00 + i01*args.ne00; @@ -4291,7 +4830,7 @@ kernel void kernel_cpy( device T1 * dst_data = (device T1 *) (dst + i3*args.nb3 + i2*args.nb2 + i1*args.nb1 + i0*args.nb0); - for (int64_t i00 = tpitg.x; i00 < args.ne00; i00 += ntg.x) { + for (int64_t i00 = tiitg%tptg.x; i00 < args.ne00; i00 += tptg.x) { device const T0 * src = (device T0 *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); dst_data[i00] = (T1) src[0]; } @@ -4311,6 +4850,7 @@ template [[host_name("kernel_cpy_bf16_f32")]] kernel kernel_cpy_t kernel_cpy; #endif +// TODO: templetify these kernels kernel void kernel_cpy_f32_q8_0( constant ggml_metal_kargs_cpy & args, device const char * src0, @@ -4334,23 +4874,7 @@ kernel void kernel_cpy_f32_q8_0( for (int64_t i00 = tpitg.x*QK8_0; i00 < args.ne00; i00 += ntg.x*QK8_0) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float amax = 0.0f; // absolute max - - for (int j = 0; j < QK8_0; j++) { - const float v = src[j]; - amax = MAX(amax, fabs(v)); - } - - const float d = amax / ((1 << 7) - 1); - const float id = d ? 1.0f/d : 0.0f; - - dst_data[i00/QK8_0].d = d; - - for (int j = 0; j < QK8_0; ++j) { - const float x0 = src[j]*id; - - dst_data[i00/QK8_0].qs[j] = round(x0); - } + quantize_q8_0(src, dst_data[i00/QK8_0]); } } @@ -4377,32 +4901,7 @@ kernel void kernel_cpy_f32_q4_0( for (int64_t i00 = tpitg.x*QK4_0; i00 < args.ne00; i00 += ntg.x*QK4_0) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float amax = 0.0f; // absolute max - float max = 0.0f; - - for (int j = 0; j < QK4_0; j++) { - const float v = src[j]; - if (amax < fabs(v)) { - amax = fabs(v); - max = v; - } - } - - const float d = max / -8; - const float id = d ? 1.0f/d : 0.0f; - - dst_data[i00/QK4_0].d = d; - - for (int j = 0; j < QK4_0/2; ++j) { - const float x0 = src[0 + j]*id; - const float x1 = src[QK4_0/2 + j]*id; - - const uint8_t xi0 = MIN(15, (int8_t)(x0 + 8.5f)); - const uint8_t xi1 = MIN(15, (int8_t)(x1 + 8.5f)); - - dst_data[i00/QK4_0].qs[j] = xi0; - dst_data[i00/QK4_0].qs[j] |= xi1 << 4; - } + quantize_q4_0(src, dst_data[i00/QK4_0]); } } @@ -4429,31 +4928,7 @@ kernel void kernel_cpy_f32_q4_1( for (int64_t i00 = tpitg.x*QK4_1; i00 < args.ne00; i00 += ntg.x*QK4_1) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float min = FLT_MAX; - float max = -FLT_MAX; - - for (int j = 0; j < QK4_1; j++) { - const float v = src[j]; - if (min > v) min = v; - if (max < v) max = v; - } - - const float d = (max - min) / ((1 << 4) - 1); - const float id = d ? 1.0f/d : 0.0f; - - dst_data[i00/QK4_1].d = d; - dst_data[i00/QK4_1].m = min; - - for (int j = 0; j < QK4_1/2; ++j) { - const float x0 = (src[0 + j] - min)*id; - const float x1 = (src[QK4_1/2 + j] - min)*id; - - const uint8_t xi0 = MIN(15, (int8_t)(x0 + 0.5f)); - const uint8_t xi1 = MIN(15, (int8_t)(x1 + 0.5f)); - - dst_data[i00/QK4_1].qs[j] = xi0; - dst_data[i00/QK4_1].qs[j] |= xi1 << 4; - } + quantize_q4_1(src, dst_data[i00/QK4_1]); } } @@ -4480,38 +4955,7 @@ kernel void kernel_cpy_f32_q5_0( for (int64_t i00 = tpitg.x*QK5_0; i00 < args.ne00; i00 += ntg.x*QK5_0) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float amax = 0.0f; // absolute max - float max = 0.0f; - - for (int j = 0; j < QK5_0; j++) { - const float v = src[j]; - if (amax < fabs(v)) { - amax = fabs(v); - max = v; - } - } - - const float d = max / -16; - const float id = d ? 1.0f/d : 0.0f; - - dst_data[i00/QK5_0].d = d; - - uint32_t qh = 0; - for (int j = 0; j < QK5_0/2; ++j) { - const float x0 = src[0 + j]*id; - const float x1 = src[QK5_0/2 + j]*id; - - const uint8_t xi0 = MIN(31, (int8_t)(x0 + 16.5f)); - const uint8_t xi1 = MIN(31, (int8_t)(x1 + 16.5f)); - - dst_data[i00/QK5_0].qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4); - qh |= ((xi0 & 0x10u) >> 4) << (j + 0); - qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_0/2); - } - thread const uint8_t * qh8 = (thread const uint8_t *)&qh; - for (int j = 0; j < 4; ++j) { - dst_data[i00/QK5_0].qh[j] = qh8[j]; - } + quantize_q5_0(src, dst_data[i00/QK5_0]); } } @@ -4538,49 +4982,8 @@ kernel void kernel_cpy_f32_q5_1( for (int64_t i00 = tpitg.x*QK5_1; i00 < args.ne00; i00 += ntg.x*QK5_1) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float max = src[0]; - float min = src[0]; - - for (int j = 1; j < QK5_1; j++) { - const float v = src[j]; - min = v < min ? v : min; - max = v > max ? v : max; - } - - const float d = (max - min) / 31; - const float id = d ? 1.0f/d : 0.0f; - - dst_data[i00/QK5_1].d = d; - dst_data[i00/QK5_1].m = min; - - uint32_t qh = 0; - for (int j = 0; j < QK5_1/2; ++j) { - const float x0 = (src[0 + j] - min)*id; - const float x1 = (src[QK5_1/2 + j] - min)*id; - - const uint8_t xi0 = (uint8_t)(x0 + 0.5f); - const uint8_t xi1 = (uint8_t)(x1 + 0.5f); - - dst_data[i00/QK5_1].qs[j] = (xi0 & 0xf) | ((xi1 & 0xf) << 4); - qh |= ((xi0 & 0x10u) >> 4) << (j + 0); - qh |= ((xi1 & 0x10u) >> 4) << (j + QK5_1/2); - } - thread const uint8_t * qh8 = (thread const uint8_t *)&qh; - for (int j = 0; j < 4; ++j) { - dst_data[i00/QK5_1].qh[j] = qh8[j]; - } - } -} - -static inline int best_index_int8(int n, constant float * val, float x) { - if (x <= val[0]) return 0; - if (x >= val[n-1]) return n-1; - int ml = 0, mu = n-1; - while (mu-ml > 1) { - int mav = (ml+mu)/2; - if (x < val[mav]) mu = mav; else ml = mav; + quantize_q5_1(src, dst_data[i00/QK5_1]); } - return x - val[mu-1] < val[mu] - x ? mu-1 : mu; } kernel void kernel_cpy_f32_iq4_nl( @@ -4606,40 +5009,7 @@ kernel void kernel_cpy_f32_iq4_nl( for (int64_t i00 = tpitg.x*QK4_NL; i00 < args.ne00; i00 += ntg.x*QK4_NL) { device const float * src = (device float *)(src0 + i03*args.nb03 + i02*args.nb02 + i01*args.nb01 + i00*args.nb00); - float amax = 0.0f; // absolute max - float max = 0.0f; - - for (int j = 0; j < QK4_NL; j++) { - const float v = src[j]; - if (amax < fabs(v)) { - amax = fabs(v); - max = v; - } - } - - const float d = max / kvalues_iq4nl_f[0]; - const float id = d ? 1.0f/d : 0.0f; - - float sumqx = 0, sumq2 = 0; - for (int j = 0; j < QK4_NL/2; ++j) { - const float x0 = src[0 + j]*id; - const float x1 = src[QK4_NL/2 + j]*id; - - const uint8_t xi0 = best_index_int8(16, kvalues_iq4nl_f, x0); - const uint8_t xi1 = best_index_int8(16, kvalues_iq4nl_f, x1); - - dst_data[i00/QK4_NL].qs[j] = xi0 | (xi1 << 4); - - const float v0 = kvalues_iq4nl_f[xi0]; - const float v1 = kvalues_iq4nl_f[xi1]; - const float w0 = src[0 + j]*src[0 + j]; - const float w1 = src[QK4_NL/2 + j]*src[QK4_NL/2 + j]; - sumqx += w0*v0*src[j] + w1*v1*src[QK4_NL/2 + j]; - sumq2 += w0*v0*v0 + w1*v1*v1; - - } - - dst_data[i00/QK4_NL].d = sumq2 > 0 ? sumqx/sumq2 : d; + quantize_iq4_nl(src, dst_data[i00/QK4_NL]); } } @@ -6320,10 +6690,10 @@ kernel void kernel_mul_mv_iq4_xs_f32( template kernel void kernel_get_rows_q( + constant ggml_metal_kargs_get_rows & args, device const void * src0, device const void * src1, device float * dst, - constant ggml_metal_kargs_get_rows & args, uint3 tgpig[[threadgroup_position_in_grid]], uint tiitg[[thread_index_in_threadgroup]], uint3 tptg [[threads_per_threadgroup]]) { @@ -6343,10 +6713,10 @@ kernel void kernel_get_rows_q( template kernel void kernel_get_rows_f( + constant ggml_metal_kargs_get_rows & args, device const void * src0, device const void * src1, device float * dst, - constant ggml_metal_kargs_get_rows & args, uint3 tgpig[[threadgroup_position_in_grid]], uint tiitg[[thread_index_in_threadgroup]], uint3 tptg [[threads_per_threadgroup]]) { @@ -6364,10 +6734,10 @@ kernel void kernel_get_rows_f( } kernel void kernel_get_rows_i32( + constant ggml_metal_kargs_get_rows & args, device const void * src0, device const void * src1, device int32_t * dst, - constant ggml_metal_kargs_get_rows & args, uint3 tgpig[[threadgroup_position_in_grid]], uint tiitg[[thread_index_in_threadgroup]], uint3 tptg [[threads_per_threadgroup]]) { @@ -6384,6 +6754,67 @@ kernel void kernel_get_rows_i32( } } +template +kernel void kernel_set_rows_q32( + constant ggml_metal_kargs_set_rows & args, + device const void * src0, + device const void * src1, + device float * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg [[threads_per_threadgroup]]) { + const int32_t i03 = tgpig.z; + const int32_t i02 = tgpig.y; + + const int32_t i12 = i03%args.ne12; + const int32_t i11 = i02%args.ne11; + + const int32_t i01 = tgpig.x*tptg.y + tiitg/tptg.x; + if (i01 >= args.ne01) { + return; + } + + const int32_t i10 = i01; + const int64_t i1 = ((const device int64_t *) ((const device char *) src1 + i10*args.nb10 + i11*args.nb11 + i12*args.nb12))[0]; + + device block_q * dst_row = ( device block_q *) (( device char *) dst + i1*args.nb1 + i02*args.nb2 + i03*args.nb3); + const device float * src_row = (const device float *) ((const device char *) src0 + i01*args.nb01 + i02*args.nb02 + i03*args.nb03); + + for (int ind = tiitg%tptg.x; ind < args.nk0; ind += tptg.x) { + quantize_func(src_row + 32*ind, dst_row[ind]); + } +} + +template +kernel void kernel_set_rows_f( + constant ggml_metal_kargs_set_rows & args, + device const void * src0, + device const void * src1, + device float * dst, + uint3 tgpig[[threadgroup_position_in_grid]], + uint tiitg[[thread_index_in_threadgroup]], + uint3 tptg [[threads_per_threadgroup]]) { + const int32_t i03 = tgpig.z; + const int32_t i02 = tgpig.y; + + const int32_t i12 = i03%args.ne12; + const int32_t i11 = i02%args.ne11; + + const int32_t i01 = tgpig.x*tptg.y + tiitg/tptg.x; + if (i01 >= args.ne01) { + return; + } + + const int32_t i10 = i01; + const int64_t i1 = ((const device int64_t *) ((const device char *) src1 + i10*args.nb10 + i11*args.nb11 + i12*args.nb12))[0]; + + device T * dst_row = ( device T *) (( device char *) dst + i1*args.nb1 + i02*args.nb2 + i03*args.nb3); + const device float * src_row = (const device float *) ((const device char *) src0 + i01*args.nb01 + i02*args.nb02 + i03*args.nb03); + + for (int ind = tiitg%tptg.x; ind < args.nk0; ind += tptg.x) { + dst_row[ind] = (T) src_row[ind]; + } +} #define BLOCK_SIZE_M 64 // 8 simdgroup matrices from matrix A #define BLOCK_SIZE_N 32 // 4 simdgroup matrices from matrix B @@ -6807,6 +7238,27 @@ template [[host_name("kernel_get_rows_iq1_m")]] kernel get_rows_q_t kernel_get template [[host_name("kernel_get_rows_iq4_nl")]] kernel get_rows_q_t kernel_get_rows_q; template [[host_name("kernel_get_rows_iq4_xs")]] kernel get_rows_q_t kernel_get_rows_q; +// +// set rows +// + +typedef decltype(kernel_set_rows_f) set_rows_f_t; + +template [[host_name("kernel_set_rows_f32")]] kernel set_rows_f_t kernel_set_rows_f; +template [[host_name("kernel_set_rows_f16")]] kernel set_rows_f_t kernel_set_rows_f; +#if defined(GGML_METAL_USE_BF16) +template [[host_name("kernel_set_rows_bf16")]] kernel set_rows_f_t kernel_set_rows_f; +#endif + +typedef decltype(kernel_set_rows_q32) set_rows_q32_t; + +template [[host_name("kernel_set_rows_q8_0")]] kernel set_rows_q32_t kernel_set_rows_q32; +template [[host_name("kernel_set_rows_q4_0")]] kernel set_rows_q32_t kernel_set_rows_q32; +template [[host_name("kernel_set_rows_q4_1")]] kernel set_rows_q32_t kernel_set_rows_q32; +template [[host_name("kernel_set_rows_q5_0")]] kernel set_rows_q32_t kernel_set_rows_q32; +template [[host_name("kernel_set_rows_q5_1")]] kernel set_rows_q32_t kernel_set_rows_q32; +template [[host_name("kernel_set_rows_iq4_nl")]] kernel set_rows_q32_t kernel_set_rows_q32; + // // matrix-matrix multiplication // diff --git a/ggml/src/ggml-musa/mudnn.cuh b/ggml/src/ggml-musa/mudnn.cuh index a63be5755c79c..c30128561e810 100644 --- a/ggml/src/ggml-musa/mudnn.cuh +++ b/ggml/src/ggml-musa/mudnn.cuh @@ -1,7 +1,7 @@ #pragma once -#include "../include/ggml.h" -#include "../ggml-cuda/common.cuh" +#include "ggml-cuda/common.cuh" +#include "ggml.h" // Asynchronously copies data from src tensor to dst tensor using the provided context. // Returns a musaError_t indicating success or failure. diff --git a/ggml/src/ggml-opencl/CMakeLists.txt b/ggml/src/ggml-opencl/CMakeLists.txt index 0e2a419649cea..ec5d8cf59556b 100644 --- a/ggml/src/ggml-opencl/CMakeLists.txt +++ b/ggml/src/ggml-opencl/CMakeLists.txt @@ -65,6 +65,7 @@ set(GGML_OPENCL_KERNELS gemv_noshuffle_general gemv_noshuffle get_rows + glu group_norm im2col_f32 im2col_f16 @@ -87,6 +88,7 @@ set(GGML_OPENCL_KERNELS rms_norm rope scale + set_rows sigmoid silu softmax_4_f32 @@ -102,6 +104,7 @@ set(GGML_OPENCL_KERNELS tanh pad repeat + mul_mat_f16_f32 ) foreach (K ${GGML_OPENCL_KERNELS}) diff --git a/ggml/src/ggml-opencl/ggml-opencl.cpp b/ggml/src/ggml-opencl/ggml-opencl.cpp index 628e574f0f71e..3388259152b46 100644 --- a/ggml/src/ggml-opencl/ggml-opencl.cpp +++ b/ggml/src/ggml-opencl/ggml-opencl.cpp @@ -231,6 +231,71 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive return { type, major, minor, patch }; } +// Profiling +struct ProfilingInfo { + std::string op_name; + std::string kernel_name; + + cl_kernel kernel; + cl_event evt; + + cl_ulong cmd_queued; + cl_ulong cmd_submit; + cl_ulong cmd_start; + cl_ulong cmd_end; + cl_ulong overhead_start; + cl_ulong overhead_end; + // For the times below, see spec for clGetEventProfilingInfo + // The time kernel spent in cmd queue - SUBMIT - QUEUED + cl_ulong cmd_queued_duration_ns; + // The time kernel spent for submission - START - SUBMIT + cl_ulong cmd_submit_duration_ns; + // Kernel execution time in nanoseconds - END - START + cl_ulong cmd_duration_ns; + // The time for the kernel to complete - COMPLETE - END + cl_ulong cmd_complete_duration_ns; + // Total time to finish the kernel - COMPELTE - QUEUED + cl_ulong cmd_total_duration_ns; + // Global and local work sizes. + size_t global_size[3]; + size_t local_size[3]; + // Op output size. + size_t output_size[4]; +}; + +static void populateProfilingInfo( + ProfilingInfo& info, cl_event evt, cl_kernel kernel, cl_uint work_dim, + size_t global_size[3], size_t local_size[3], + const ggml_tensor * tensor) { + info.op_name = tensor->name; + info.kernel = kernel; + info.evt = evt; + + // 0 means not specified, e.g., 2D workgroup, or NULL for driver to choose + info.local_size[0] = 0; + info.local_size[1] = 0; + info.local_size[2] = 0; + + info.global_size[0] = 0; + info.global_size[1] = 0; + info.global_size[2] = 0; + + if (local_size) { + for (cl_uint i = 0; i < work_dim; ++i) { + info.local_size[i] = local_size[i]; + } + } + + for (cl_uint i = 0; i < work_dim; ++i) { + info.global_size[i] = global_size[i]; + } + + info.output_size[0] = tensor->ne[0]; + info.output_size[1] = tensor->ne[1]; + info.output_size[2] = tensor->ne[2]; + info.output_size[3] = tensor->ne[3]; +} + struct ggml_backend_opencl_context; // backend device context @@ -254,6 +319,8 @@ struct ggml_backend_opencl_device_context { // backend context struct ggml_backend_opencl_context { + int ref_count; + cl_device_id device; std::string device_name; @@ -284,6 +351,8 @@ struct ggml_backend_opencl_context { cl_program program_gemv_noshuffle_general; cl_program program_gemv_noshuffle; cl_program program_get_rows; + cl_program program_set_rows; + cl_program program_glu; cl_program program_im2col_f16; cl_program program_im2col_f32; cl_program program_mul_mat_Ab_Bi_8x4; @@ -299,6 +368,7 @@ struct ggml_backend_opencl_context { cl_program program_mul_mv_f16_f32; cl_program program_mul_mv_f32_f32; cl_program program_mul; + cl_program program_mul_mat_f16_f32_tiled; cl_program program_div; cl_program program_sub; cl_program program_norm; @@ -330,10 +400,13 @@ struct ggml_backend_opencl_context { cl_kernel kernel_scale; cl_kernel kernel_silu, kernel_silu_4; cl_kernel kernel_gelu, kernel_gelu_4; + cl_kernel kernel_gelu_erf, kernel_gelu_erf_4; cl_kernel kernel_gelu_quick, kernel_gelu_quick_4; cl_kernel kernel_relu; cl_kernel kernel_sigmoid_f32, kernel_sigmoid_f16; cl_kernel kernel_clamp; + cl_kernel kernel_geglu, kernel_reglu, kernel_swiglu, kernel_geglu_erf, kernel_geglu_quick, + kernel_geglu_f16, kernel_reglu_f16, kernel_swiglu_f16, kernel_geglu_erf_f16, kernel_geglu_quick_f16; cl_kernel kernel_norm; cl_kernel kernel_rms_norm; cl_kernel kernel_group_norm; @@ -341,6 +414,7 @@ struct ggml_backend_opencl_context { cl_kernel kernel_soft_max, kernel_soft_max_4; cl_kernel kernel_soft_max_f16, kernel_soft_max_4_f16; cl_kernel kernel_get_rows_f32, kernel_get_rows_f16, kernel_get_rows_q4_0; + cl_kernel kernel_set_rows_f32, kernel_set_rows_f16; cl_kernel kernel_rope_norm_f32, kernel_rope_norm_f16, kernel_rope_neox_f32, kernel_rope_neox_f16; cl_kernel kernel_rope_multi_f32, kernel_rope_multi_f16, kernel_rope_vision_f32, kernel_rope_vision_f16; cl_kernel kernel_cpy_f16_f16, kernel_cpy_f16_f32, kernel_cpy_f32_f16, kernel_cpy_f32_f32; @@ -349,6 +423,7 @@ struct ggml_backend_opencl_context { cl_kernel kernel_mul_mat_f16_f32_1row; cl_kernel kernel_mul_mat_f16_f32; cl_kernel kernel_mul_mat_f16_f32_l4; + cl_kernel kernel_mul_mat_f16_f32_tiled; cl_kernel kernel_mul_mat_q4_0_f32, kernel_mul_mat_q4_0_f32_v; cl_kernel kernel_convert_block_q4_0, kernel_restore_block_q4_0; cl_kernel kernel_mul_mat_q4_0_f32_8x_flat; @@ -369,6 +444,118 @@ struct ggml_backend_opencl_context { cl_kernel kernel_timestep_embedding; cl_kernel kernel_mul_mv_id_q4_0_f32_8x_flat; + std::vector profiling_info; + + void write_profiling_info() { + FILE * fperf = fopen("cl_profiling.csv", "w"); + if (!fperf) { + GGML_LOG_ERROR("Failed to open cl_profiling.csv\n"); + return; + } + + // Populate profiling info + for (ProfilingInfo & info : profiling_info) { + cl_ulong cmd_queued; + cl_ulong cmd_submit; + cl_ulong cmd_start; + cl_ulong cmd_end; + cl_ulong cmd_complete; + + CL_CHECK(clWaitForEvents(1, &info.evt)); + CL_CHECK(clGetEventProfilingInfo( + info.evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &cmd_queued, NULL)); + CL_CHECK(clGetEventProfilingInfo( + info.evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &cmd_submit, NULL)); + CL_CHECK(clGetEventProfilingInfo( + info.evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &cmd_start, NULL)); + CL_CHECK(clGetEventProfilingInfo( + info.evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &cmd_end, NULL)); + CL_CHECK(clGetEventProfilingInfo( + info.evt, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &cmd_complete, NULL)); + CL_CHECK(clReleaseEvent(info.evt)); + + char kernel_name[512]; + CL_CHECK(clGetKernelInfo(info.kernel, CL_KERNEL_FUNCTION_NAME, + sizeof(kernel_name), kernel_name, NULL)); + info.kernel_name = kernel_name; + + info.cmd_queued = cmd_queued; + info.cmd_submit = cmd_submit; + info.cmd_start = cmd_start; + info.cmd_end = cmd_end; + + info.cmd_queued_duration_ns = cmd_submit - cmd_queued; + info.cmd_submit_duration_ns = cmd_start - cmd_submit; + info.cmd_duration_ns = cmd_end - cmd_start; + info.cmd_complete_duration_ns = cmd_complete - cmd_end; + info.cmd_total_duration_ns = cmd_complete - cmd_queued; + } + + // Dump a csv + float total_kernel_time = 0; + fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n"); + for (const ProfilingInfo & info : profiling_info) { + total_kernel_time += info.cmd_duration_ns/1.e6f; + fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n", + info.op_name.c_str(), info.kernel_name.c_str(), + info.cmd_queued_duration_ns/1.e6f, + info.cmd_submit_duration_ns/1.e6f, + info.cmd_duration_ns/1.e6f, + info.cmd_complete_duration_ns/1.e6f, + info.cmd_total_duration_ns/1.e6f, + info.global_size[0], info.global_size[1], info.global_size[2], + info.local_size[0], info.local_size[1], info.local_size[2], + info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]); + } + fclose(fperf); + + GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time); + + // Dump a simple chrome trace + FILE* ftrace = fopen("cl_trace.json", "w"); + if (!ftrace) { + GGML_LOG_ERROR("Failed to open cl_trace.json\n"); + return; + } + + fprintf(ftrace, "[\n"); + for (const ProfilingInfo & info : profiling_info) { + fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n", + info.kernel_name.c_str(), info.cmd_queued/1000); + fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n", + info.kernel_name.c_str(), info.cmd_submit/1000); + + fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n", + info.kernel_name.c_str(), info.cmd_start/1000); + fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n", + info.kernel_name.c_str(), info.cmd_end/1000); + } + fclose(ftrace); + } + + size_t get_kernel_workgroup_size(cl_kernel kernel) const { + size_t workgroup_size = 0; + size_t ret_size = 0; + CL_CHECK( + clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &workgroup_size, &ret_size)); + GGML_ASSERT(sizeof(size_t) == ret_size); + return workgroup_size; + } + + void enqueue_ndrange_kernel(cl_kernel kernel, cl_uint work_dim, size_t *global_work_size, size_t *local_work_size, const ggml_tensor * tensor) { +#ifdef GGML_OPENCL_PROFILING + cl_event evt; + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + + profiling_info.emplace_back(); + populateProfilingInfo(profiling_info.back(), evt, kernel, work_dim, global_work_size, local_work_size, tensor); +#else + GGML_UNUSED(tensor); + CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, work_dim, NULL, global_work_size, local_work_size, 0, NULL, NULL)); +#endif + } + #ifdef GGML_OPENCL_USE_ADRENO_KERNELS // Transpose kernels cl_program program_transpose; @@ -395,46 +582,19 @@ struct ggml_backend_opencl_context { cl_kernel CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096; cl_kernel CL_mul_mat_vec_q4_0_f32_1d_4x_flat_32000_1_4096; #endif // GGML_OPENCL_USE_ADRENO_KERNELS -}; - -// All registered devices with a default device in the front. -static std::vector g_ggml_backend_opencl_devices; -// Profiling + void free() { + ref_count--; + if (ref_count == 0) { #ifdef GGML_OPENCL_PROFILING -struct ProfilingInfo { - std::string op_name; - std::string kernel_name; - - cl_kernel kernel; - cl_event evt; - - cl_ulong cmd_queued; - cl_ulong cmd_submit; - cl_ulong cmd_start; - cl_ulong cmd_end; - cl_ulong overhead_start; - cl_ulong overhead_end; - // For the times below, see spec for clGetEventProfilingInfo - // The time kernel spent in cmd queue - SUBMIT - QUEUED - cl_ulong cmd_queued_duration_ns; - // The time kernel spent for submission - START - SUBMIT - cl_ulong cmd_submit_duration_ns; - // Kernel execution time in nanoseconds - END - START - cl_ulong cmd_duration_ns; - // The time for the kernel to complete - COMPLETE - END - cl_ulong cmd_complete_duration_ns; - // Total time to finish the kernel - COMPELTE - QUEUED - cl_ulong cmd_total_duration_ns; - // Global and local work sizes. - size_t global_size[3]; - size_t local_size[3]; - // Op output size. - size_t output_size[4]; + write_profiling_info(); +#endif + } + } }; -std::vector g_profiling_info; -#endif +// All registered devices with a default device in the front. +static std::vector g_ggml_backend_opencl_devices; inline std::string read_file(const std::string &path) { std::ifstream ifs(path); @@ -591,11 +751,38 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve CL_CHECK((backend_ctx->kernel_gelu = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu", &err), err)); CL_CHECK((backend_ctx->kernel_gelu_4 = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu_4", &err), err)); + CL_CHECK((backend_ctx->kernel_gelu_erf = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu_erf", &err), err)); + CL_CHECK((backend_ctx->kernel_gelu_erf_4 = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu_erf_4", &err), err)); CL_CHECK((backend_ctx->kernel_gelu_quick = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu_quick", &err), err)); CL_CHECK((backend_ctx->kernel_gelu_quick_4 = clCreateKernel(backend_ctx->program_gelu, "kernel_gelu_quick_4", &err), err)); GGML_LOG_CONT("."); } + // glu + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "glu.cl.h" + }; +#else + const std::string kernel_src = read_file("glu.cl"); +#endif + backend_ctx->program_glu = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_geglu = clCreateKernel(backend_ctx->program_glu, "kernel_geglu", &err), err)); + CL_CHECK((backend_ctx->kernel_reglu = clCreateKernel(backend_ctx->program_glu, "kernel_reglu", &err), err)); + CL_CHECK((backend_ctx->kernel_swiglu = clCreateKernel(backend_ctx->program_glu, "kernel_swiglu", &err), err)); + CL_CHECK((backend_ctx->kernel_geglu_erf = clCreateKernel(backend_ctx->program_glu, "kernel_geglu_erf", &err), err)); + CL_CHECK((backend_ctx->kernel_geglu_quick = clCreateKernel(backend_ctx->program_glu, "kernel_geglu_quick", &err), err)); + CL_CHECK((backend_ctx->kernel_geglu_f16 = clCreateKernel(backend_ctx->program_glu, "kernel_geglu_f16", &err), err)); + CL_CHECK((backend_ctx->kernel_reglu_f16 = clCreateKernel(backend_ctx->program_glu, "kernel_reglu_f16", &err), err)); + CL_CHECK((backend_ctx->kernel_swiglu_f16 = clCreateKernel(backend_ctx->program_glu, "kernel_swiglu_f16", &err), err)); + CL_CHECK((backend_ctx->kernel_geglu_erf_f16 = clCreateKernel(backend_ctx->program_glu, "kernel_geglu_erf_f16", &err), err)); + CL_CHECK((backend_ctx->kernel_geglu_quick_f16 = clCreateKernel(backend_ctx->program_glu, "kernel_geglu_quick_f16", &err), err)); + GGML_LOG_CONT("."); + } + // get_rows { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -830,6 +1017,22 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve GGML_LOG_CONT("."); } + // mul_mat_f16_f32_tiled + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "mul_mat_f16_f32.cl.h" + }; +#else + const std::string kernel_src = read_file("mul_mat_f16_f32.cl"); +#endif + backend_ctx->program_mul_mat_f16_f32_tiled = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_mul_mat_f16_f32_tiled = clCreateKernel(backend_ctx->program_mul_mat_f16_f32_tiled, "mul_mat_f16_f32", &err), err)); + GGML_LOG_CONT("."); + } + // mul { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -1258,6 +1461,23 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve } } + // set_rows + { +#ifdef GGML_OPENCL_EMBED_KERNELS + const std::string kernel_src { + #include "set_rows.cl.h" + }; +#else + const std::string kernel_src = read_file("set_rows.cl"); +#endif + backend_ctx->program_set_rows = + build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts); + + CL_CHECK((backend_ctx->kernel_set_rows_f32 = clCreateKernel(backend_ctx->program_set_rows, "kernel_set_rows_f32", &err), err)); + CL_CHECK((backend_ctx->kernel_set_rows_f16 = clCreateKernel(backend_ctx->program_set_rows, "kernel_set_rows_f16", &err), err)); + GGML_LOG_CONT("."); + } + // mul_mv_id_q4_0_f32_8x_flat { #ifdef GGML_OPENCL_EMBED_KERNELS @@ -1669,6 +1889,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { backend_ctx->device = dev_ctx->device; backend_ctx->gpu_family = GPU_FAMILY::UNKNOWN; + // ref_count get increased in ggml_backend_opencl_device_init + // This function is also used to retrieve backend context, so we don't want + // to increase ref_count for each call. We only want to increase ref_count + // when the associated device is initialized + backend_ctx->ref_count = 0; + if (strstr(dev_ctx->device_name.c_str(), "Adreno") || strstr(dev_ctx->device_name.c_str(), "Qualcomm") || strstr(dev_ctx->device_version.c_str(), "Adreno")) { @@ -1841,93 +2067,22 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) { return dev_ctx->backend_ctx; } -static void ggml_cl2_free(void) { -#ifdef GGML_OPENCL_PROFILING - FILE * fperf = fopen("cl_profiling.csv", "w"); - if (!fperf) { - GGML_LOG_ERROR("Failed to open cl_profiling.csv\n"); - return; - } +static void ggml_cl2_free(ggml_backend_t backend) { + ggml_backend_opencl_context * ctx = (ggml_backend_opencl_context *) backend->context; + ctx->free(); - // Populate profiling info - for (ProfilingInfo & info : g_profiling_info) { - cl_ulong cmd_queued; - cl_ulong cmd_submit; - cl_ulong cmd_start; - cl_ulong cmd_end; - cl_ulong cmd_complete; - - CL_CHECK(clWaitForEvents(1, &info.evt)); - CL_CHECK(clGetEventProfilingInfo( - info.evt, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &cmd_queued, NULL)); - CL_CHECK(clGetEventProfilingInfo( - info.evt, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &cmd_submit, NULL)); - CL_CHECK(clGetEventProfilingInfo( - info.evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &cmd_start, NULL)); - CL_CHECK(clGetEventProfilingInfo( - info.evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &cmd_end, NULL)); - CL_CHECK(clGetEventProfilingInfo( - info.evt, CL_PROFILING_COMMAND_COMPLETE, sizeof(cl_ulong), &cmd_complete, NULL)); - CL_CHECK(clReleaseEvent(info.evt)); - - char kernel_name[512]; - CL_CHECK(clGetKernelInfo(info.kernel, CL_KERNEL_FUNCTION_NAME, - sizeof(kernel_name), kernel_name, NULL)); - info.kernel_name = kernel_name; - - info.cmd_queued = cmd_queued; - info.cmd_submit = cmd_submit; - info.cmd_start = cmd_start; - info.cmd_end = cmd_end; - - info.cmd_queued_duration_ns = cmd_submit - cmd_queued; - info.cmd_submit_duration_ns = cmd_start - cmd_submit; - info.cmd_duration_ns = cmd_end - cmd_start; - info.cmd_complete_duration_ns = cmd_complete - cmd_end; - info.cmd_total_duration_ns = cmd_complete - cmd_queued; - } - - // Dump a csv - float total_kernel_time = 0; - fprintf(fperf, "op name, kernel name, queued duration (ms), submit duration(ms), exec duration (ms), complete duration (ms), total duration (ms), global size, local size, output size\n"); - for (const ProfilingInfo & info : g_profiling_info) { - total_kernel_time += info.cmd_duration_ns/1.e6f; - fprintf(fperf, "%s,%s,%f,%f,%f,%f,%f,%zux%zux%zu,%zux%zux%zu,%zux%zux%zux%zu\n", - info.op_name.c_str(), info.kernel_name.c_str(), - info.cmd_queued_duration_ns/1.e6f, - info.cmd_submit_duration_ns/1.e6f, - info.cmd_duration_ns/1.e6f, - info.cmd_complete_duration_ns/1.e6f, - info.cmd_total_duration_ns/1.e6f, - info.global_size[0], info.global_size[1], info.global_size[2], - info.local_size[0], info.local_size[1], info.local_size[2], - info.output_size[0], info.output_size[1], info.output_size[2], info.output_size[3]); - } - fclose(fperf); - - GGML_LOG_INFO("ggml_opencl: total kernel time: %f\n", total_kernel_time); - - // Dump a simple chrome trace - FILE* ftrace = fopen("cl_trace.json", "w"); - if (!ftrace) { - GGML_LOG_ERROR("Failed to open cl_trace.json\n"); - return; + // The CL context is shared by all backends, release it if all backends have been released + bool should_release_opencl = true; + for (auto device : g_ggml_backend_opencl_devices) { + ggml_backend_opencl_device_context * ctx_dev = (ggml_backend_opencl_device_context *) device.context; + if (ctx_dev->backend_ctx->ref_count > 0) { + should_release_opencl = false; + } } - fprintf(ftrace, "[\n"); - for (const ProfilingInfo & info : g_profiling_info) { - fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n", - info.kernel_name.c_str(), info.cmd_queued/1000); - fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Host\"},\n", - info.kernel_name.c_str(), info.cmd_submit/1000); - - fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"B\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n", - info.kernel_name.c_str(), info.cmd_start/1000); - fprintf(ftrace, "{\"name\": \"%s\", \"cat\": \"OpenCL\", \"ph\": \"E\", \"ts\": %lu, \"pid\": \"\", \"tid\": \"Device\"},\n", - info.kernel_name.c_str(), info.cmd_end/1000); + if (should_release_opencl) { + CL_CHECK(clReleaseContext(ctx->context)); } - fclose(ftrace); -#endif } //------------------------------------------------------------------------------ @@ -2011,9 +2166,7 @@ static const char * ggml_backend_opencl_name(ggml_backend_t backend) { } static void ggml_backend_opencl_free(ggml_backend_t backend) { - ggml_cl2_free(); - - GGML_UNUSED(backend); + ggml_cl2_free(backend); } static void ggml_backend_opencl_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { @@ -2088,7 +2241,7 @@ static ggml_status ggml_backend_opencl_graph_compute(ggml_backend_t backend, ggm // dependencies. sync_with_other_backends(backend); - if (node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) { + if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) { continue; } @@ -2123,6 +2276,22 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te default: return false; } + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 +#pragma message("TODO: implement BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)") + if (op->src[0]->type != GGML_TYPE_F32) { + return false; + } + switch (op->type) { + case GGML_TYPE_F16: + case GGML_TYPE_F32: + return true; + default: + return false; + } + } case GGML_OP_CPY: case GGML_OP_DUP: case GGML_OP_CONT: @@ -2157,6 +2326,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te case GGML_UNARY_OP_GELU: case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_RELU: + case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_QUICK: return ggml_is_contiguous(op->src[0]) && op->src[0]->type == GGML_TYPE_F32; case GGML_UNARY_OP_SIGMOID: @@ -2167,6 +2337,17 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te default: return false; } + case GGML_OP_GLU: + switch (ggml_get_glu_op(op)) { + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + return ggml_is_contiguous_1(op->src[0]) && (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16); + default: + return false; + } case GGML_OP_CLAMP: return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_SOFT_MAX: @@ -2899,6 +3080,8 @@ static void ggml_backend_opencl_device_get_props(ggml_backend_dev_t dev, struct static ggml_backend_t ggml_backend_opencl_device_init(ggml_backend_dev_t dev, const char * params) { ggml_backend_opencl_context * backend_ctx = ggml_cl2_init(dev); + // Getting a new reference to the backend, increase ref_count + backend_ctx->ref_count++; ggml_backend_t backend = new ggml_backend { /* .guid = */ ggml_backend_opencl_guid(), @@ -3089,7 +3272,7 @@ static void dump_tensor(ggml_backend_t backend, const struct ggml_tensor * tenso // Open file and dump. char fname[512]; - sprintf(fname, "./tensor-dumps/%s.txt", tensor->name); + snprintf(fname, sizeof(fname), "./tensor-dumps/%s.txt", tensor->name); FILE * f = fopen(fname, "w"); if (!f) { printf("Failed to open %s\n", fname); @@ -3159,31 +3342,6 @@ static void dump_tensor(ggml_backend_t backend, const struct ggml_tensor * tenso #define dump_tensor(tensor) #endif -//------------------------------------------------------------------------------ -// Profiling utility -//------------------------------------------------------------------------------ -#ifdef GGML_OPENCL_PROFILING -static void populateProfilingInfo( - ProfilingInfo& info, cl_event evt, cl_kernel kernel, - size_t global_size[3], size_t local_size[3], - const ggml_tensor * tensor) { - info.op_name = tensor->name; - info.kernel = kernel; - info.evt = evt; - - info.local_size[0] = local_size[0]; - info.local_size[1] = local_size[1]; - info.local_size[2] = local_size[2]; - info.global_size[0] = global_size[0]; - info.global_size[1] = global_size[1]; - info.global_size[2] = global_size[2]; - info.output_size[0] = tensor->ne[0]; - info.output_size[1] = tensor->ne[1]; - info.output_size[2] = tensor->ne[2]; - info.output_size[3] = tensor->ne[3]; -} -#endif - //------------------------------------------------------------------------------ // Ops //------------------------------------------------------------------------------ @@ -3227,7 +3385,6 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c const cl_ulong nb2 = dst ? dst->nb[2] : 0; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -3271,15 +3428,112 @@ static void ggml_cl_get_rows(ggml_backend_t backend, const ggml_tensor * src0, c size_t global_work_size[] = {(size_t)ne10, (size_t)ne11, 1}; size_t local_work_size[] = {1, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); +} - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif +static void ggml_cl_set_rows(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0); + GGML_ASSERT(src0->extra); + GGML_ASSERT(src1); + GGML_ASSERT(src1->extra); + GGML_ASSERT(dst); + GGML_ASSERT(dst->extra); + + // ne0 = ne00 + // ne2 = ne02 + // ne3 = ne03 + + const int ne01 = src0->ne[1]; + const int ne02 = src0->ne[2]; + const int ne03 = src0->ne[3]; + + const cl_ulong nb01 = src0->nb[1]; + const cl_ulong nb02 = src0->nb[2]; + const cl_ulong nb03 = src0->nb[3]; + + const int ne11 = src1->ne[1]; + const int ne12 = src1->ne[2]; + + const cl_ulong nb10 = src1->nb[0]; + const cl_ulong nb11 = src1->nb[1]; + const cl_ulong nb12 = src1->nb[2]; + + const int ne0 = dst->ne[0]; + + const cl_ulong nb1 = dst->nb[1]; + const cl_ulong nb2 = dst->nb[2]; + const cl_ulong nb3 = dst->nb[3]; + + const int nblk0 = ne0/ggml_blck_size(dst->type); + + ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; + + ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; + ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; + ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; + + cl_ulong offset0 = extra0->offset + src0->view_offs; + cl_ulong offset1 = extra1->offset + src1->view_offs; + cl_ulong offsetd = extrad->offset + dst->view_offs; + + cl_kernel kernel; + + switch (dst->type) { + case GGML_TYPE_F32: + kernel = backend_ctx->kernel_set_rows_f32; + break; + case GGML_TYPE_F16: + kernel = backend_ctx->kernel_set_rows_f16; + break; + default: + GGML_ABORT("not implemented"); + } + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &offset0)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb01)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &nb02)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &nb03)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne11)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb10)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb11)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_ulong), &nb12)); + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(int), &nblk0)); + CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb1)); + CL_CHECK(clSetKernelArg(kernel, 17, sizeof(cl_ulong), &nb2)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(cl_ulong), &nb3)); + + int nth0 = 64; + if (backend_ctx->gpu_family == INTEL) { + nth0 = 32; + } else if (backend_ctx->gpu_family == ADRENO) { + nth0 = 64; + } + + int max_workgroup_size = backend_ctx->get_kernel_workgroup_size(kernel); + while (nth0 < nblk0 && nth0 < max_workgroup_size) { + nth0 *= 2; + } + + int rows_per_workgroup = 1; + if (nth0 > nblk0) { + rows_per_workgroup = nth0 / nblk0; + nth0 = nblk0; + } + + size_t global_work_size[] = { + (size_t)(ne01 + rows_per_workgroup - 1)/rows_per_workgroup*nth0, + (size_t)ne02*rows_per_workgroup, + (size_t)ne03}; + size_t local_work_size[] = {(size_t)nth0, (size_t)rows_per_workgroup, 1}; + + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -3321,7 +3575,6 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const const cl_ulong nb3 = dst ? dst->nb[3] : 0; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -3396,29 +3649,13 @@ static void ggml_cl_add(ggml_backend_t backend, const ggml_tensor * src0, const local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } else { unsigned int nth = MIN(64, ne0); size_t global_work_size[] = {ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } } @@ -3461,7 +3698,6 @@ static void ggml_cl_mul(ggml_backend_t backend, const ggml_tensor * src0, const const cl_ulong nb3 = dst ? dst->nb[3] : 0; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -3536,29 +3772,13 @@ static void ggml_cl_mul(ggml_backend_t backend, const ggml_tensor * src0, const local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } else { unsigned int nth = MIN(64, ne0); size_t global_work_size[] = {ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } } @@ -3598,7 +3818,6 @@ static void ggml_cl_div(ggml_backend_t backend, const ggml_tensor * src0, const const cl_ulong nb3 = dst->nb[3]; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -3661,29 +3880,13 @@ static void ggml_cl_div(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)n, 1, 1}; size_t local_work_size[] = {64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } else { unsigned int nth = MIN(64, ne0); size_t global_work_size[] = {ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } } @@ -3723,7 +3926,6 @@ static void ggml_cl_sub(ggml_backend_t backend, const ggml_tensor * src0, const const cl_ulong nb3 = dst->nb[3]; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -3786,29 +3988,13 @@ static void ggml_cl_sub(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)n, 1, 1}; size_t local_work_size[] = {64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } else { unsigned int nth = MIN(64, ne0); size_t global_work_size[] = {ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } } @@ -3821,7 +4007,6 @@ static void ggml_cl_gelu(ggml_backend_t backend, const ggml_tensor * src0, const UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -3848,15 +4033,45 @@ static void ggml_cl_gelu(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)n, 1, 1}; size_t local_work_size[] = {64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); +} - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); -#endif +static void ggml_cl_gelu_erf(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0); + GGML_ASSERT(src0->extra); + GGML_ASSERT(dst); + GGML_ASSERT(dst->extra); + + UNUSED(src1); + + ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; + + ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; + ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; + + cl_ulong offset0 = extra0->offset + src0->view_offs; + cl_ulong offsetd = extrad->offset + dst->view_offs; + + cl_kernel kernel; + + int n = ggml_nelements(dst); + + if (n % 4 == 0) { + kernel = backend_ctx->kernel_gelu_erf_4; + n /= 4; + } else { + kernel = backend_ctx->kernel_gelu_erf; + } + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &offset0)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offsetd)); + + size_t global_work_size[] = {(size_t)n, 1, 1}; + size_t local_work_size[] = {64, 1, 1}; + + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_gelu_quick(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -3868,7 +4083,6 @@ static void ggml_cl_gelu_quick(ggml_backend_t backend, const ggml_tensor * src0, UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -3895,15 +4109,7 @@ static void ggml_cl_gelu_quick(ggml_backend_t backend, const ggml_tensor * src0, size_t global_work_size[] = {(size_t)n, 1, 1}; size_t local_work_size[] = {64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_silu(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -3915,7 +4121,6 @@ static void ggml_cl_silu(ggml_backend_t backend, const ggml_tensor * src0, const UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -3947,15 +4152,7 @@ static void ggml_cl_silu(ggml_backend_t backend, const ggml_tensor * src0, const local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_relu(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -3967,7 +4164,6 @@ static void ggml_cl_relu(ggml_backend_t backend, const ggml_tensor * src0, const UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -3992,15 +4188,7 @@ static void ggml_cl_relu(ggml_backend_t backend, const ggml_tensor * src0, const local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_sigmoid(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4012,7 +4200,6 @@ static void ggml_cl_sigmoid(ggml_backend_t backend, const ggml_tensor * src0, co UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -4044,15 +4231,7 @@ static void ggml_cl_sigmoid(ggml_backend_t backend, const ggml_tensor * src0, co local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_clamp(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4064,7 +4243,6 @@ static void ggml_cl_clamp(ggml_backend_t backend, const ggml_tensor * src0, cons UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -4096,15 +4274,7 @@ static void ggml_cl_clamp(ggml_backend_t backend, const ggml_tensor * src0, cons local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_norm(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4116,7 +4286,6 @@ static void ggml_cl_norm(ggml_backend_t backend, const ggml_tensor * src0, const UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -4157,15 +4326,7 @@ static void ggml_cl_norm(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {(size_t)nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_rms_norm(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4177,7 +4338,6 @@ static void ggml_cl_rms_norm(ggml_backend_t backend, const ggml_tensor * src0, c UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; //ggml_backend_opencl_device_context * dev_ctx = // (ggml_backend_opencl_device_context *)backend->device->context; @@ -4241,15 +4401,7 @@ static void ggml_cl_rms_norm(ggml_backend_t backend, const ggml_tensor * src0, c // This is local memory - the size depends on subgroup size. CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float)*nth/sgs, NULL)); -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_group_norm(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4261,7 +4413,6 @@ static void ggml_cl_group_norm(ggml_backend_t backend, const ggml_tensor * src0, UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -4300,15 +4451,7 @@ static void ggml_cl_group_norm(ggml_backend_t backend, const ggml_tensor * src0, size_t global_work_size[] = {(size_t)n_groups*sgs, 1, 1}; size_t local_work_size[] = {(size_t)sgs, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_tanh(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4320,7 +4463,6 @@ static void ggml_cl_tanh(ggml_backend_t backend, const ggml_tensor * src0, const UNUSED(src1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -4397,16 +4539,7 @@ static void ggml_cl_tanh(ggml_backend_t backend, const ggml_tensor * src0, const } if (global_work_size[0] == 0 || global_work_size[1] == 0 || global_work_size[2] == 0) return; - -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr ? local_work_size : (size_t[3]){0,0,0}, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_repeat(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1_shape_def, ggml_tensor * dst) { @@ -4419,7 +4552,6 @@ static void ggml_cl_repeat(ggml_backend_t backend, const ggml_tensor * src0, con UNUSED(src1_shape_def); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; if (backend_ctx->kernel_repeat == nullptr) { GGML_LOG_WARN("%s: repeat kernel not available, skipping OpenCL execution.\n", __func__); @@ -4467,15 +4599,7 @@ static void ggml_cl_repeat(ggml_backend_t backend, const ggml_tensor * src0, con size_t global_work_size[] = { gws0, gws1, gws2 }; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, NULL, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, (size_t[3]){0,0,0}, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, NULL, dst); } static void ggml_cl_pad(ggml_backend_t backend, const ggml_tensor * src0, ggml_tensor * dst) { @@ -4488,7 +4612,6 @@ static void ggml_cl_pad(ggml_backend_t backend, const ggml_tensor * src0, ggml_t GGML_ASSERT(src0->ne[3] == 1 && dst->ne[3] == 1); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; if (backend_ctx->kernel_pad == nullptr) { GGML_LOG_WARN("%s: pad kernel not available, skipping OpenCL execution.\n", __func__); @@ -4533,15 +4656,7 @@ static void ggml_cl_pad(ggml_backend_t backend, const ggml_tensor * src0, ggml_t local_work_size_ptr = nullptr; } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr ? local_work_size : (size_t[3]){0,0,0}, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, ggml_tensor * dst) { @@ -4553,9 +4668,9 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg GGML_ASSERT(dst->type == GGML_TYPE_F32); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; - const ggml_scale_mode mode = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0); + const int mode_flags = (ggml_scale_mode) ggml_get_op_params_i32(dst, 0); + const ggml_scale_mode mode = (ggml_scale_mode) (mode_flags & 0xFF); cl_kernel kernel = nullptr; if (mode == GGML_SCALE_MODE_NEAREST) { @@ -4586,18 +4701,22 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg const cl_ulong nb02 = src0->nb[2]; const cl_ulong nb03 = src0->nb[3]; - const int ne00_src = src0->ne[0]; - const int ne01_src = src0->ne[1]; + const int ne00 = src0->ne[0]; + const int ne01 = src0->ne[1]; + const int ne02 = src0->ne[2]; + const int ne03 = src0->ne[3]; + + const int ne0 = dst->ne[0]; + const int ne1 = dst->ne[1]; + const int ne2 = dst->ne[2]; + const int ne3 = dst->ne[3]; - const int ne10_dst = dst->ne[0]; - const int ne11_dst = dst->ne[1]; - const int ne12_dst = dst->ne[2]; - const int ne13_dst = dst->ne[3]; + float sf0 = (float)ne0 / ne00; + float sf1 = (float)ne1 / ne01; + float sf2 = (float)ne2 / ne02; + float sf3 = (float)ne3 / ne03; - const float sf0 = (float)dst->ne[0] / src0->ne[0]; - const float sf1 = (float)dst->ne[1] / src0->ne[1]; - const float sf2 = (float)dst->ne[2] / src0->ne[2]; - const float sf3 = (float)dst->ne[3] / src0->ne[3]; + float pixel_offset = 0.5f; CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra_src0->data_device)); CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &off_src0)); @@ -4609,29 +4728,36 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb03)); if (mode == GGML_SCALE_MODE_NEAREST) { - CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne10_dst)); - CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne11_dst)); - CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12_dst)); - CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne13_dst)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne2)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne3)); CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float), &sf0)); CL_CHECK(clSetKernelArg(kernel, 13, sizeof(float), &sf1)); CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf2)); CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf3)); } else if (mode == GGML_SCALE_MODE_BILINEAR) { - CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00_src)); - CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01_src)); - CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne10_dst)); - CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne11_dst)); - CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne12_dst)); - CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne13_dst)); + if (mode_flags & GGML_SCALE_FLAG_ALIGN_CORNERS) { + sf0 = (float)(ne0 - 1) / (ne00 - 1); + sf1 = (float)(ne1 - 1) / (ne01 - 1); + pixel_offset = 0.0f; + } + + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne00)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(int), &ne01)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne1)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(int), &ne2)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &ne3)); CL_CHECK(clSetKernelArg(kernel, 14, sizeof(float), &sf0)); CL_CHECK(clSetKernelArg(kernel, 15, sizeof(float), &sf1)); CL_CHECK(clSetKernelArg(kernel, 16, sizeof(float), &sf2)); CL_CHECK(clSetKernelArg(kernel, 17, sizeof(float), &sf3)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(float), &pixel_offset)); } - size_t dst_total_elements = (size_t)ne10_dst * ne11_dst * ne12_dst * ne13_dst; + size_t dst_total_elements = (size_t)ne0 * ne1 * ne2 * ne3; if (dst_total_elements == 0) { return; } @@ -4644,17 +4770,7 @@ static void ggml_cl_upscale(ggml_backend_t backend, const ggml_tensor * src0, gg local_work_size_ptr = nullptr; } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - size_t profiling_gws[3] = {global_work_size[0], 1, 1}; - size_t profiling_lws[3] = {local_work_size_ptr ? local_work_size[0] : 0, 1, 1}; - populateProfilingInfo(g_profiling_info.back(), evt, kernel, profiling_gws, profiling_lws, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 1, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_concat(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4732,7 +4848,7 @@ static void ggml_cl_concat(ggml_backend_t backend, const ggml_tensor * src0, con global_work_size[1] = d_ne1; global_work_size[2] = d_ne2; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, NULL, 0, NULL, NULL)); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, NULL, dst); } } } else { @@ -4782,7 +4898,7 @@ static void ggml_cl_concat(ggml_backend_t backend, const ggml_tensor * src0, con d_ne2 > 0 ? (size_t)d_ne2 : 1, d_ne3 > 0 ? (size_t)d_ne3 : 1 }; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size_nc, NULL, 0, NULL, NULL)); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size_nc, NULL, dst); } } @@ -4795,7 +4911,6 @@ static void ggml_cl_timestep_embedding(ggml_backend_t backend, const ggml_tensor GGML_ASSERT(dst->type == GGML_TYPE_F32); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; if (backend_ctx->kernel_timestep_embedding == nullptr) { GGML_LOG_WARN("%s: timestep_embedding kernel not available, skipping OpenCL execution.\n", __func__); @@ -4828,17 +4943,59 @@ static void ggml_cl_timestep_embedding(ggml_backend_t backend, const ggml_tensor size_t global_work_size[] = {gws0, gws1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_work_size, NULL, 0, NULL, &evt)); // Pass 2 for 2D problem + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, NULL, dst); +} - g_profiling_info.emplace_back(); - size_t profiling_gws[3] = {global_work_size[0], global_work_size[1], 1}; - size_t profiling_lws[3] = {0,0,0}; // Reflects NULL LWS - populateProfilingInfo(g_profiling_info.back(), evt, kernel, profiling_gws, profiling_lws, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_work_size, NULL, 0, NULL, NULL)); // Pass 2 for 2D problem -#endif +static void ggml_cl_mul_mat_f16_f32_tiled(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; + + ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; + ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; + ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; + + cl_ulong offset0 = extra0->offset + src0->view_offs; + cl_ulong offset1 = extra1->offset + src1->view_offs; + cl_ulong offsetd = extrad->offset + dst->view_offs; + + const int M = src0->ne[1]; + const int N = src1->ne[1]; + const int K = src0->ne[0]; + + cl_kernel kernel = backend_ctx->kernel_mul_mat_f16_f32_tiled; + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(int), &M)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(int), &N)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(int), &K)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_mem), &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_ulong), &offset0)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_mem), &extra1->data_device)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &offsetd)); + + // Tiling parameters. These need to be tuned for optimal performance. + // They must match the #defines in the kernel mul_mat_f16_f32.cl. + // + // OPWM / OPWN: Output tile size per Work-Group. A work-group computes a tile of size OPWM x OPWN. + // TPWM / TPWN: Threads per Work-group. This is the work-group size. + // OPTM / OPTN: Output elements per Thread. Each thread computes OPTM x OPTN elements. + // + // The following relationships must hold: + // OPWM = TPWM * OPTM + // OPWN = TPWN * OPTN + // + const int OPWM = 64; + const int OPWN = 64; + const int TPWM = 16; + const int TPWN = 8; + + size_t local_work_size[2] = { TPWM, TPWN }; + size_t global_work_size[2] = { + (size_t) ((M + OPWM - 1) / OPWM) * TPWM, + (size_t) ((N + OPWN - 1) / OPWN) * TPWN, + }; + + backend_ctx->enqueue_ndrange_kernel(kernel, 2, global_work_size, local_work_size, dst); } static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -4853,7 +5010,18 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; + + if (src0t == GGML_TYPE_F16 && src1t == GGML_TYPE_F32 && + src0->ne[1] > 32 && // M > 32 + src1->ne[1] > 32 && // N > 32 + src0->ne[0] > 32 && // K > 32 + src0->ne[2] == 1 && src0->ne[3] == 1 && + src1->ne[2] == 1 && src1->ne[3] == 1 && + ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && + backend_ctx->kernel_mul_mat_f16_f32_tiled != NULL) { + ggml_cl_mul_mat_f16_f32_tiled(backend, src0, src1, dst); + return; + } ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -5058,15 +5226,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co static_cast(padded_height_B) }; - #ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size_t, local_size_t, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_size_t, local_size_t, dst); - #else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 2, NULL, global_size_t, local_size_t, 0, NULL, NULL)); - #endif + backend_ctx->enqueue_ndrange_kernel(kernel, 2, global_size_t, local_size_t, dst); } else { // no need to transpose B in other cases // create an image for B from sub_buffer @@ -5188,16 +5348,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co // enqueue kernel with profiling // <--------------------------------------------> // - #ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); - // enqueue kernel without profiling - #else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); - #endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); // <--------------------------------------------> // // deallocate sub buffers and images @@ -5277,15 +5428,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co global_work_size[2] = (size_t)ne12*ne13; } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); return; } #else // GGML_OPENCL_SOA_Q @@ -5515,15 +5658,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co size_t global_work_size[] = {(size_t)(ne01 + ndst-1)/ndst*nth0, (size_t)ne11*nth1, (size_t)ne12*ne13}; size_t local_work_size[] = {(size_t)nth0, (size_t)nth1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } else if (src0t == GGML_TYPE_Q4_K) { GGML_ASSERT(false && "not implemented"); } else if (src0t == GGML_TYPE_Q3_K) { @@ -5534,30 +5669,14 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co size_t global_work_size[] = {(size_t)(ne01+1)/2*nth0, (size_t)ne11*nth1, (size_t)ne12*ne13}; size_t local_work_size[] = {(size_t)nth0, (size_t)nth1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } else { int64_t ny = (ne11 + nrows - 1)/nrows; size_t global_work_size[] = {(size_t)ne01*nth0, (size_t)ny*nth1, (size_t)ne12*ne13}; size_t local_work_size[] = {(size_t)nth0, (size_t)nth1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } } @@ -5574,7 +5693,6 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0, GGML_ASSERT(src2->extra); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; ggml_tensor_extra_cl * extra2 = (ggml_tensor_extra_cl *)src2->extra; @@ -5680,15 +5798,7 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0, size_t global_work_size[] = {(size_t)(ne01+ndst*nsg-1)/(ndst*nsg)*sgs, (size_t)(_ne1+nrows-1)/nrows*nsg, (size_t)ne123}; size_t local_work_size[] = {(size_t)sgs, (size_t)nsg, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -5701,10 +5811,11 @@ static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, cons GGML_ASSERT(ggml_is_contiguous(src0)); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; float scale; - memcpy(&scale, dst->op_params, sizeof(scale)); + float bias; + memcpy(&scale, ((int32_t *) dst->op_params) + 0, sizeof(float)); + memcpy(&bias, ((int32_t *) dst->op_params) + 1, sizeof(float)); ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -5719,6 +5830,7 @@ static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, cons CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extrad->data_device)); CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offsetd)); CL_CHECK(clSetKernelArg(kernel, 4, sizeof(float), &scale)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(float), &bias)); int n = ggml_nelements(dst)/4; @@ -5730,15 +5842,7 @@ static void ggml_cl_scale(ggml_backend_t backend, const ggml_tensor * src0, cons local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -5775,7 +5879,6 @@ static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const const enum ggml_type src1t = src1 ? src1->type : GGML_TYPE_COUNT; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -5840,15 +5943,7 @@ static void ggml_cl_cpy(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {(size_t)nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, src1); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, src1); } static void ggml_cl_dup(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -5871,7 +5966,6 @@ static void ggml_cl_diag_mask_inf(ggml_backend_t backend, const ggml_tensor * sr const int ne02 = src0 ? src0->ne[2] : 0; ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -5895,15 +5989,7 @@ static void ggml_cl_diag_mask_inf(ggml_backend_t backend, const ggml_tensor * sr size_t global_work_size[] = {(size_t)ne00*ne01*ne02/8, 1, 1}; size_t local_work_size[] = {64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } else { kernel = backend_ctx->kernel_diag_mask_inf; @@ -5923,15 +6009,7 @@ static void ggml_cl_diag_mask_inf(ggml_backend_t backend, const ggml_tensor * sr local_work_size_ptr = nullptr; // Let driver choose the work-group sizes. } -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size_ptr, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size_ptr, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size_ptr, dst); } } @@ -5951,7 +6029,6 @@ static void ggml_cl_soft_max(ggml_backend_t backend, const ggml_tensor * src0, c } ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -5963,19 +6040,31 @@ static void ggml_cl_soft_max(ggml_backend_t backend, const ggml_tensor * src0, c cl_ulong offset1 = extra1 ? extra1->offset + src1->view_offs : offset0; - const int ne00 = src0 ? src0->ne[0] : 0; - const int ne01 = src0 ? src0->ne[1] : 0; - const int ne02 = src0 ? src0->ne[2] : 0; - const int ne03 = src0 ? src0->ne[3] : 0; + const int ne00 = src0->ne[0]; + const int ne01 = src0->ne[1]; + const int ne02 = src0->ne[2]; + const int ne03 = src0->ne[3]; + + const cl_long nb01 = src0->nb[1]; + const cl_long nb02 = src0->nb[2]; + const cl_long nb03 = src0->nb[3]; + + const int ne12 = src1 ? src1->ne[2] : 0; + const int ne13 = src1 ? src1->ne[3] : 0; + + const cl_long nb11 = src1 ? src1->nb[1] : 0; + const cl_long nb12 = src1 ? src1->nb[2] : 0; + const cl_long nb13 = src1 ? src1->nb[3] : 0; + + const cl_long nb1 = dst->nb[1]; + const cl_long nb2 = dst->nb[2]; + const cl_long nb3 = dst->nb[3]; float scale, max_bias; memcpy(&scale, dst->op_params + 0, sizeof(float)); memcpy(&max_bias, dst->op_params + 1, sizeof(float)); - const int nrows_x = ggml_nrows(src0); - const int nrows_y = src0->ne[1]; - - const int n_head = nrows_x/nrows_y; + const int n_head = src0->ne[2]; const int n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head)); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); @@ -6020,26 +6109,27 @@ static void ggml_cl_soft_max(ggml_backend_t backend, const ggml_tensor * src0, c CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &offsetd)); CL_CHECK(clSetKernelArg(kernel, 6, sizeof(int), &ne00)); - CL_CHECK(clSetKernelArg(kernel, 7, sizeof(int), &ne01)); - CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne02)); - CL_CHECK(clSetKernelArg(kernel, 9, sizeof(float), &scale)); - CL_CHECK(clSetKernelArg(kernel, 10, sizeof(float), &max_bias)); - CL_CHECK(clSetKernelArg(kernel, 11, sizeof(float), &m0)); - CL_CHECK(clSetKernelArg(kernel, 12, sizeof(float), &m1)); - CL_CHECK(clSetKernelArg(kernel, 13, sizeof(int), &n_head_log2)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb01)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &nb02)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &nb03)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne12)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne13)); + CL_CHECK(clSetKernelArg(kernel, 12, sizeof(cl_ulong), &nb11)); + CL_CHECK(clSetKernelArg(kernel, 13, sizeof(cl_ulong), &nb12)); + CL_CHECK(clSetKernelArg(kernel, 14, sizeof(cl_ulong), &nb13)); + CL_CHECK(clSetKernelArg(kernel, 15, sizeof(cl_ulong), &nb1)); + CL_CHECK(clSetKernelArg(kernel, 16, sizeof(cl_ulong), &nb2)); + CL_CHECK(clSetKernelArg(kernel, 17, sizeof(cl_ulong), &nb3)); + CL_CHECK(clSetKernelArg(kernel, 18, sizeof(float), &scale)); + CL_CHECK(clSetKernelArg(kernel, 19, sizeof(float), &max_bias)); + CL_CHECK(clSetKernelArg(kernel, 20, sizeof(float), &m0)); + CL_CHECK(clSetKernelArg(kernel, 21, sizeof(float), &m1)); + CL_CHECK(clSetKernelArg(kernel, 22, sizeof(int), &n_head_log2)); size_t global_work_size[] = {(size_t)ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {(size_t)nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -6051,7 +6141,6 @@ static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const GGML_ASSERT(dst->extra); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; @@ -6217,15 +6306,7 @@ static void ggml_cl_rope(ggml_backend_t backend, const ggml_tensor * src0, const size_t global_work_size[] = {(size_t)ne01*nth, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {(size_t)nth, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_im2col(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -6240,7 +6321,6 @@ static void ggml_cl_im2col(ggml_backend_t backend, const ggml_tensor * src0, con GGML_ASSERT(dst->type == GGML_TYPE_F16 || dst->type == GGML_TYPE_F32); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra1 = (ggml_tensor_extra_cl *)src1->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -6309,15 +6389,7 @@ static void ggml_cl_im2col(ggml_backend_t backend, const ggml_tensor * src0, con size_t global_work_size[] = {(size_t)num_blocks*256, (size_t)OH, (size_t)batch*IC}; size_t local_work_size[] = {256, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_argsort(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -6332,7 +6404,6 @@ static void ggml_cl_argsort(ggml_backend_t backend, const ggml_tensor * src0, co GGML_ASSERT(ggml_is_contiguous(src0)); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -6364,15 +6435,7 @@ static void ggml_cl_argsort(ggml_backend_t backend, const ggml_tensor * src0, co size_t global_work_size[] = {(size_t)ne00_padded, (size_t)nrows, (size_t)1}; size_t local_work_size[] = {(size_t)ne00_padded, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); - - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } static void ggml_cl_sum_rows(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { @@ -6386,7 +6449,6 @@ static void ggml_cl_sum_rows(ggml_backend_t backend, const ggml_tensor * src0, c GGML_ASSERT(ggml_is_contiguous(src0)); ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; - cl_command_queue queue = backend_ctx->queue; ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; @@ -6427,15 +6489,106 @@ static void ggml_cl_sum_rows(ggml_backend_t backend, const ggml_tensor * src0, c size_t global_work_size[] = {(size_t)ne01, (size_t)ne02, (size_t)ne03}; size_t local_work_size[] = {(size_t)64, 1, 1}; -#ifdef GGML_OPENCL_PROFILING - cl_event evt; - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, &evt)); + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); +} - g_profiling_info.emplace_back(); - populateProfilingInfo(g_profiling_info.back(), evt, kernel, global_work_size, local_work_size, dst); -#else - CL_CHECK(clEnqueueNDRangeKernel(queue, kernel, 3, NULL, global_work_size, local_work_size, 0, NULL, NULL)); -#endif +static void ggml_cl_glu(ggml_backend_t backend, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { + GGML_ASSERT(src0); + GGML_ASSERT(src0->extra); + GGML_ASSERT(dst); + GGML_ASSERT(dst->extra); + + GGML_ASSERT(ggml_is_contiguous_1(src0)); + + if (src1) { + GGML_ASSERT(src1); + GGML_ASSERT(src1->extra); + GGML_ASSERT(ggml_are_same_shape(src0, src1)); + } + + ggml_backend_opencl_context *backend_ctx = (ggml_backend_opencl_context *)backend->context; + + cl_kernel kernel; + switch (ggml_get_glu_op(dst)) { + case GGML_GLU_OP_GEGLU: + if (dst->type == GGML_TYPE_F32) { + kernel = backend_ctx->kernel_geglu; + } else { + kernel = backend_ctx->kernel_geglu_f16; + } + break; + case GGML_GLU_OP_REGLU: + if (dst->type == GGML_TYPE_F32) { + kernel = backend_ctx->kernel_reglu; + } else { + kernel = backend_ctx->kernel_reglu_f16; + } + break; + case GGML_GLU_OP_SWIGLU: + if (dst->type == GGML_TYPE_F32) { + kernel = backend_ctx->kernel_swiglu; + } else { + kernel = backend_ctx->kernel_swiglu_f16; + } + break; + case GGML_GLU_OP_GEGLU_ERF: + if (dst->type == GGML_TYPE_F32) { + kernel = backend_ctx->kernel_geglu_erf; + } else { + kernel = backend_ctx->kernel_geglu_erf_f16; + } + break; + case GGML_GLU_OP_GEGLU_QUICK: + if (dst->type == GGML_TYPE_F32) { + kernel = backend_ctx->kernel_geglu_quick; + } else { + kernel = backend_ctx->kernel_geglu_quick_f16; + } + break; + default: + GGML_ABORT("Unsupported glu op"); + } + + ggml_tensor_extra_cl * extra0 = (ggml_tensor_extra_cl *)src0->extra; + ggml_tensor_extra_cl * extrad = (ggml_tensor_extra_cl *)dst->extra; + + ggml_tensor_extra_cl * extra1 = src1 ? (ggml_tensor_extra_cl *)src1->extra : nullptr; + + cl_ulong offset0 = extra0->offset + src0->view_offs; + cl_ulong offsetd = extrad->offset + dst->view_offs; + + cl_ulong offset1 = extra1 ? extra1->offset + src1->view_offs : offset0; + + const int ne0 = dst->ne[0]; + + const cl_ulong nb01 = src0->nb[1]; + const cl_ulong nb11 = src1 ? src1->nb[1] : nb01; + + const cl_ulong nb1 = dst->nb[1]; + + const int swp = ((const int32_t *) dst->op_params)[1]; + const int ne00_off = src1 ? 0 : (swp ? ne0 : 0); + const int ne10_off = src1 ? 0 : (swp ? 0 : ne0); + + CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_ulong), &offset0)); + CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), src1 ? &extra1->data_device : &extra0->data_device)); + CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1)); + CL_CHECK(clSetKernelArg(kernel, 4, sizeof(cl_mem), &extrad->data_device)); + CL_CHECK(clSetKernelArg(kernel, 5, sizeof(cl_ulong), &offsetd)); + CL_CHECK(clSetKernelArg(kernel, 6, sizeof(cl_ulong), &nb01)); + CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_ulong), &nb11)); + CL_CHECK(clSetKernelArg(kernel, 8, sizeof(int), &ne0)); + CL_CHECK(clSetKernelArg(kernel, 9, sizeof(cl_ulong), &nb1)); + CL_CHECK(clSetKernelArg(kernel, 10, sizeof(int), &ne00_off)); + CL_CHECK(clSetKernelArg(kernel, 11, sizeof(int), &ne10_off)); + + const size_t nrows = ggml_nrows(src0); + size_t nth = 512; + size_t global_work_size[] = {nrows*nth, 1, 1}; + size_t local_work_size[] = {nth, 1, 1}; + + backend_ctx->enqueue_ndrange_kernel(kernel, 3, global_work_size, local_work_size, dst); } //------------------------------------------------------------------------------ @@ -6461,6 +6614,12 @@ bool ggml_cl_compute_forward(ggml_backend_t backend, struct ggml_tensor * tensor } func = ggml_cl_get_rows; break; + case GGML_OP_SET_ROWS: + if (!any_on_device) { + return false; + } + func = ggml_cl_set_rows; + break; case GGML_OP_CPY: if (!any_on_device) { return false; @@ -6506,6 +6665,12 @@ bool ggml_cl_compute_forward(ggml_backend_t backend, struct ggml_tensor * tensor } func = ggml_cl_gelu; break; + case GGML_UNARY_OP_GELU_ERF: + if (!any_on_device) { + return false; + } + func = ggml_cl_gelu_erf; + break; case GGML_UNARY_OP_GELU_QUICK: if (!any_on_device) { return false; @@ -6539,6 +6704,12 @@ bool ggml_cl_compute_forward(ggml_backend_t backend, struct ggml_tensor * tensor default: return false; } break; + case GGML_OP_GLU: + if (!any_on_device) { + return false; + } + func = ggml_cl_glu; + break; case GGML_OP_CLAMP: if (!any_on_device) { return false; diff --git a/ggml/src/ggml-opencl/kernels/gelu.cl b/ggml/src/ggml-opencl/kernels/gelu.cl index 71c310cc9f986..1ab426c774452 100644 --- a/ggml/src/ggml-opencl/kernels/gelu.cl +++ b/ggml/src/ggml-opencl/kernels/gelu.cl @@ -6,6 +6,7 @@ #define GELU_COEF_A 0.044715f #define GELU_QUICK_COEF -1.702f #define SQRT_2_OVER_PI 0.79788456080286535587989211986876f +#define SQRT_2_INV 0.70710678118654752440084436210484f kernel void kernel_gelu( global float * src0, @@ -35,6 +36,32 @@ kernel void kernel_gelu_4( dst[get_global_id(0)] = 0.5f*x*(1.0f + tanh(SQRT_2_OVER_PI*x*(1.0f + GELU_COEF_A*x*x))); } +kernel void kernel_gelu_erf( + global float * src0, + ulong offset0, + global float * dst, + ulong offsetd +) { + src0 = (global float*)((global char*)src0 + offset0); + dst = (global float*)((global char*)dst + offsetd); + + float x = src0[get_global_id(0)]; + dst[get_global_id(0)] = 0.5f*x*(1.0f + erf(x*SQRT_2_INV)); +} + +kernel void kernel_gelu_erf_4( + global float4 * src0, + ulong offset0, + global float4 * dst, + ulong offsetd +) { + src0 = (global float4*)((global char*)src0 + offset0); + dst = (global float4*)((global char*)dst + offsetd); + + float4 x = src0[get_global_id(0)]; + dst[get_global_id(0)] = 0.5f*x*(1.0f + erf(x*SQRT_2_INV)); +} + kernel void kernel_gelu_quick( global float * src0, ulong offset0, diff --git a/ggml/src/ggml-opencl/kernels/glu.cl b/ggml/src/ggml-opencl/kernels/glu.cl new file mode 100644 index 0000000000000..7cca16e6a9e7e --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/glu.cl @@ -0,0 +1,337 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#define GELU_COEF_A 0.044715f +#define GELU_QUICK_COEF -1.702f +#define SQRT_2_OVER_PI 0.79788456080286535587989211986876f +#define SQRT_2_INV 0.70710678118654752440084436210484f + +//------------------------------------------------------------------------------ +// geglu +//------------------------------------------------------------------------------ +kernel void kernel_geglu( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global float * src0_row = (global float *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global float * src1_row = (global float *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global float * dst_row = (global float *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu = 0.5f*x0*(1.0f + tanh(SQRT_2_OVER_PI*x0*(1.0f + GELU_COEF_A*x0*x0))); + + dst_row[i0] = gelu*x1; + } +} + +kernel void kernel_geglu_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global half * src0_row = (global half *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global half * src1_row = (global half *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global half * dst_row = (global half *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const half x0 = src0_row[i0]; + const half x1 = src1_row[i0]; + + const half gelu = 0.5f*x0*(1.0f + tanh(SQRT_2_OVER_PI*x0*(1.0f + GELU_COEF_A*x0*x0))); + + dst_row[i0] = gelu*x1; + } +} + +//------------------------------------------------------------------------------ +// reglu +//------------------------------------------------------------------------------ +kernel void kernel_reglu( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global float * src0_row = (global float *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global float * src1_row = (global float *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global float * dst_row = (global float *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + dst_row[i0] = x0*x1*(x0 > 0.0f); + } +} + +kernel void kernel_reglu_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global half * src0_row = (global half *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global half * src1_row = (global half *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global half * dst_row = (global half *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const half x0 = src0_row[i0]; + const half x1 = src1_row[i0]; + + dst_row[i0] = x0*x1*(x0 > 0.0f); + } +} + +//------------------------------------------------------------------------------ +// swiglu +//------------------------------------------------------------------------------ +kernel void kernel_swiglu( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global float * src0_row = (global float *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global float * src1_row = (global float *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global float * dst_row = (global float *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float silu = x0 / (1.0f + exp(-x0)); + + dst_row[i0] = silu*x1; + } +} + +kernel void kernel_swiglu_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global half * src0_row = (global half *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global half * src1_row = (global half *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global half * dst_row = (global half *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const half x0 = src0_row[i0]; + const half x1 = src1_row[i0]; + + const half silu = x0 / (1.0f + exp(-x0)); + + dst_row[i0] = silu*x1; + } +} + +//------------------------------------------------------------------------------ +// geglu_erf +//------------------------------------------------------------------------------ +kernel void kernel_geglu_erf( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global float * src0_row = (global float *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global float * src1_row = (global float *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global float * dst_row = (global float *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu_erf = 0.5f*x0*(1.0f + erf(x0*SQRT_2_INV)); + + dst_row[i0] = gelu_erf*x1; + } +} + +kernel void kernel_geglu_erf_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global half * src0_row = (global half *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global half * src1_row = (global half *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global half * dst_row = (global half *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const half x0 = src0_row[i0]; + const half x1 = src1_row[i0]; + + const half gelu_erf = 0.5f*x0*(1.0f + erf(x0*SQRT_2_INV)); + + dst_row[i0] = gelu_erf*x1; + } +} + +//------------------------------------------------------------------------------ +// geglu_quick +//------------------------------------------------------------------------------ +kernel void kernel_geglu_quick( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global float * src0_row = (global float *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global float * src1_row = (global float *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global float * dst_row = (global float *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const float x0 = src0_row[i0]; + const float x1 = src1_row[i0]; + + const float gelu_quick = x0*(1.0f/(1.0f + exp(GELU_QUICK_COEF*x0))); + + dst_row[i0] = gelu_quick*x1; + } +} + +kernel void kernel_geglu_quick_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + ulong nb01, + ulong nb11, + int ne0, + ulong nb1, + int ne00_off, + int ne10_off +) { + src0 = (global char*)((global char*)src0 + offset0); + src1 = (global char*)((global char*)src1 + offset1); + dst = (global char*)((global char*)dst + offsetd); + + global half * src0_row = (global half *) ((global char *) src0 + get_group_id(0)*nb01) + ne00_off; + global half * src1_row = (global half *) ((global char *) src1 + get_group_id(0)*nb11) + ne10_off; + global half * dst_row = (global half *) ((global char *) dst + get_group_id(0)*nb1); + + for (int i0 = get_local_id(0); i0 < ne0; i0 += get_local_size(0)) { + const half x0 = src0_row[i0]; + const half x1 = src1_row[i0]; + + const half gelu_quick = x0*(1.0f/(1.0f + exp(GELU_QUICK_COEF*x0))); + + dst_row[i0] = gelu_quick*x1; + } +} diff --git a/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl b/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl new file mode 100644 index 0000000000000..73a888494dccf --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/mul_mat_f16_f32.cl @@ -0,0 +1,130 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +#if defined(cl_qcom_reqd_sub_group_size) +#pragma OPENCL EXTENSION cl_qcom_reqd_sub_group_size : enable +#define REQD_SUBGROUP_SIZE_128 __attribute__((qcom_reqd_sub_group_size("full"))) +#else +#define REQD_SUBGROUP_SIZE_128 +#endif + +#define OPWM 64 +#define OPWN 64 +#define CPWK 8 +#define OPTM 4 +#define OPTN 8 + +#define WG_M (OPWM / OPTM) +#define WG_N (OPWN / OPTN) +#define VEC_K (CPWK / 4) + +REQD_SUBGROUP_SIZE_128 +__kernel void mul_mat_f16_f32( + const int M, const int N, const int K, + __global const void* A_void, ulong A_offset, + __global const void* B_void, ulong B_offset, + __global void* C_void, ulong C_offset) { + + __global const half* A = (__global const half* )((__global const char*)A_void + A_offset); + __global const float* B = (__global const float*)((__global const char*)B_void + B_offset); + __global float* C = (__global float*)((__global char*)C_void + C_offset); + + const int lidm = get_local_id(0); + const int lidn = get_local_id(1); + const int lid = lidn * WG_M + lidm; + + const int offsetM = get_group_id(0) * OPWM; + const int offsetN = get_group_id(1) * OPWN; + + __local half4 Alocal[OPWM][VEC_K]; + __local float4 Blocal[OPWN][VEC_K]; + + float sum[OPTM][OPTN]; + + for (int wm = 0; wm < OPTM; wm++) { + for (int wn = 0; wn < OPTN; wn++) { + sum[wm][wn] = 0.0f; + } + } + + const int numTiles = (K + CPWK - 1) / CPWK; + + const int load_row_a = lid % OPWM; + const int load_vec_k_a = lid / OPWM; + const int global_row_a = offsetM + load_row_a; + + const int load_row_b = lid % OPWN; + const int load_vec_k_b = lid / OPWN; + const int global_row_b = offsetN + load_row_b; + + for (int t = 0; t < numTiles; t++) { + const int k_start = t * CPWK; + const int k_vec_start_a = k_start + load_vec_k_a * 4; + const int k_vec_start_b = k_start + load_vec_k_b * 4; + + if (global_row_a < M && k_vec_start_a < K) { + if (k_vec_start_a + 3 < K) { + Alocal[load_row_a][load_vec_k_a] = vload4(0, A + global_row_a * K + k_vec_start_a); + } else { + half4 tempA = (half4)(0.0h); + if (k_vec_start_a < K) tempA.s0 = A[global_row_a * K + k_vec_start_a]; + if (k_vec_start_a + 1 < K) tempA.s1 = A[global_row_a * K + k_vec_start_a + 1]; + if (k_vec_start_a + 2 < K) tempA.s2 = A[global_row_a * K + k_vec_start_a + 2]; + Alocal[load_row_a][load_vec_k_a] = tempA; + } + } else { + Alocal[load_row_a][load_vec_k_a] = (half4)(0.0h); + } + + if (global_row_b < N && k_vec_start_b < K) { + if (k_vec_start_b + 3 < K) { + Blocal[load_row_b][load_vec_k_b] = vload4(0, B + global_row_b * K + k_vec_start_b); + } else { + float4 tempB = (float4)(0.0f); + if (k_vec_start_b < K) tempB.s0 = B[global_row_b * K + k_vec_start_b]; + if (k_vec_start_b + 1 < K) tempB.s1 = B[global_row_b * K + k_vec_start_b + 1]; + if (k_vec_start_b + 2 < K) tempB.s2 = B[global_row_b * K + k_vec_start_b + 2]; + Blocal[load_row_b][load_vec_k_b] = tempB; + } + } else { + Blocal[load_row_b][load_vec_k_b] = (float4)(0.0f); + } + + barrier(CLK_LOCAL_MEM_FENCE); + + #pragma unroll + for (int k_vec = 0; k_vec < VEC_K; k_vec++) { + float4 a_fvecs[OPTM]; + int current_row_a = lidm; + for (int wm = 0; wm < OPTM; wm++) { + a_fvecs[wm] = convert_float4(Alocal[current_row_a][k_vec]); + current_row_a += WG_M; + } + + float4 b_fvecs[OPTN]; + int current_row_b = lidn; + for (int wn = 0; wn < OPTN; wn++) { + b_fvecs[wn] = Blocal[current_row_b][k_vec]; + current_row_b += WG_N; + } + + for (int wm = 0; wm < OPTM; wm++) { + for (int wn = 0; wn < OPTN; wn++) { + sum[wm][wn] += dot(a_fvecs[wm], b_fvecs[wn]); + } + } + } + barrier(CLK_LOCAL_MEM_FENCE); + } + + for (int wm = 0; wm < OPTM; wm++) { + int globalRow = offsetM + lidm + wm * WG_M; + if (globalRow < M) { + for (int wn = 0; wn < OPTN; wn++) { + int globalCol = offsetN + lidn + wn * WG_N; + if (globalCol < N) { + C[globalCol * M + globalRow] = sum[wm][wn]; + } + } + } + } +} diff --git a/ggml/src/ggml-opencl/kernels/scale.cl b/ggml/src/ggml-opencl/kernels/scale.cl index 8cfd518fa5a3e..aeca8a456e4fe 100644 --- a/ggml/src/ggml-opencl/kernels/scale.cl +++ b/ggml/src/ggml-opencl/kernels/scale.cl @@ -8,9 +8,10 @@ kernel void kernel_scale( ulong offset0, global float4 * dst, ulong offsetd, - float scale + float scale, + float bias ) { src0 = (global float4*)((global char*)src0 + offset0); dst = (global float4*)((global char*)dst + offsetd); - dst[get_global_id(0)] = src0[get_global_id(0)] * scale; + dst[get_global_id(0)] = src0[get_global_id(0)] * scale + bias; } diff --git a/ggml/src/ggml-opencl/kernels/set_rows.cl b/ggml/src/ggml-opencl/kernels/set_rows.cl new file mode 100644 index 0000000000000..a94b4361b4d33 --- /dev/null +++ b/ggml/src/ggml-opencl/kernels/set_rows.cl @@ -0,0 +1,95 @@ +#pragma OPENCL EXTENSION cl_khr_fp16 : enable + +kernel void kernel_set_rows_f32( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + int ne01, + ulong nb01, + ulong nb02, + ulong nb03, + int ne11, + int ne12, + ulong nb10, + ulong nb11, + ulong nb12, + int nblk0, + ulong nb1, + ulong nb2, + ulong nb3 +) { + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; + + int i03 = get_group_id(2); + int i02 = get_group_id(1); + int i01 = get_group_id(0)*get_local_size(1) + get_local_id(1); + + if (i01 >= ne01) { + return; + } + + int i12 = i03%ne12; + int i11 = i02%ne11; + + int i10 = i01; + long i1 = ((global long *)(src1 + i10*nb10 + i11*nb11 + i12*nb12))[0]; + + global float * dst_row = (global float *) (dst + i1*nb1 + i02*nb2 + i03*nb3); + global float * src_row = (global float *) (src0 + i01*nb01 + i02*nb02 + i03*nb03); + + for (int ind = get_local_id(0); ind < nblk0; ind += get_local_size(0)) { + dst_row[ind] = (float)src_row[ind]; + } +} + +kernel void kernel_set_rows_f16( + global char * src0, + ulong offset0, + global char * src1, + ulong offset1, + global char * dst, + ulong offsetd, + int ne01, + ulong nb01, + ulong nb02, + ulong nb03, + int ne11, + int ne12, + ulong nb10, + ulong nb11, + ulong nb12, + int nblk0, + ulong nb1, + ulong nb2, + ulong nb3 +) { + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; + + int i03 = get_group_id(2); + int i02 = get_group_id(1); + int i01 = get_group_id(0)*get_local_size(1) + get_local_id(1); + + if (i01 >= ne01) { + return; + } + + int i12 = i03%ne12; + int i11 = i02%ne11; + + int i10 = i01; + long i1 = ((global long *)(src1 + i10*nb10 + i11*nb11 + i12*nb12))[0]; + + global half * dst_row = (global half *) (dst + i1*nb1 + i02*nb2 + i03*nb3); + global float * src_row = (global float *) (src0 + i01*nb01 + i02*nb02 + i03*nb03); + + for (int ind = get_local_id(0); ind < nblk0; ind += get_local_size(0)) { + dst_row[ind] = src_row[ind]; + } +} diff --git a/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl b/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl index 62c05369a87b1..a6d8ede67010d 100644 --- a/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl +++ b/ggml/src/ggml-opencl/kernels/softmax_4_f16.cl @@ -22,32 +22,45 @@ REQD_SUBGROUP_SIZE_64 #endif kernel void kernel_soft_max_4_f16( - global float * src0, + global char * src0, ulong offset0, - global half * src1, + global char * src1, ulong offset1, - global float * dst, + global char * dst, ulong offsetd, int ne00, - int ne01, - int ne02, + ulong nb01, + ulong nb02, + ulong nb03, + int ne12, + int ne13, + ulong nb11, + ulong nb12, + ulong nb13, + ulong nb1, + ulong nb2, + ulong nb3, float scale, float max_bias, float m0, float m1, int n_head_log2 ) { - src0 = (global float *)((global char *)src0 + offset0); - src1 = (global half *)((global char *)src1 + offset1); - dst = (global float *)((global char *)dst + offsetd); + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; int i03 = get_group_id(2); int i02 = get_group_id(1); int i01 = get_group_id(0); - global float4 * psrc4 = (global float4 *)(src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); - global half4 * pmask = (global char *)src1 != (global char *)src0 ? (global half4 *)(src1 + i01*ne00) : 0; - global float4 * pdst4 = (global float4 *)(dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + int i13 = i03%ne13; + int i12 = i02%ne12; + int i11 = i01; + + global float4 * psrc4 = (global float4 *)(src0 + i01*nb01 + i02*nb02 + i03*nb03); + global half4 * pmask = src1 != src0 ? (global half4 *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0; + global float4 * pdst4 = (global float4 *)(dst + i01*nb1 + i02*nb2 + i03*nb3); float slope = 1.0f; diff --git a/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl b/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl index d562774eaba5e..35b5573b46a81 100644 --- a/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl +++ b/ggml/src/ggml-opencl/kernels/softmax_4_f32.cl @@ -22,32 +22,45 @@ REQD_SUBGROUP_SIZE_64 #endif kernel void kernel_soft_max_4( - global float * src0, + global char * src0, ulong offset0, - global float * src1, + global char * src1, ulong offset1, - global float * dst, + global char * dst, ulong offsetd, int ne00, - int ne01, - int ne02, + ulong nb01, + ulong nb02, + ulong nb03, + int ne12, + int ne13, + ulong nb11, + ulong nb12, + ulong nb13, + ulong nb1, + ulong nb2, + ulong nb3, float scale, float max_bias, float m0, float m1, int n_head_log2 ) { - src0 = (global float*)((global char*)src0 + offset0); - src1 = (global float*)((global char*)src1 + offset1); - dst = (global float*)((global char*)dst + offsetd); + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; int i03 = get_group_id(2); int i02 = get_group_id(1); int i01 = get_group_id(0); - global float4 * psrc4 = (global float4 *)(src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); - global float4 * pmask = src1 != src0 ? (global float4 *)(src1 + i01*ne00) : 0; - global float4 * pdst4 = (global float4 *)(dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00); + int i13 = i03%ne13; + int i12 = i02%ne12; + int i11 = i01; + + global float4 * psrc4 = (global float4 *)(src0 + i01*nb01 + i02*nb02 + i03*nb03); + global float4 * pmask = src1 != src0 ? (global float4 *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0; + global float4 * pdst4 = (global float4 *)(dst + i01*nb1 + i02*nb2 + i03*nb3); float slope = 1.0f; diff --git a/ggml/src/ggml-opencl/kernels/softmax_f16.cl b/ggml/src/ggml-opencl/kernels/softmax_f16.cl index d38d099671ecf..9d292b57465a5 100644 --- a/ggml/src/ggml-opencl/kernels/softmax_f16.cl +++ b/ggml/src/ggml-opencl/kernels/softmax_f16.cl @@ -22,32 +22,45 @@ REQD_SUBGROUP_SIZE_64 #endif kernel void kernel_soft_max_f16( - global float * src0, + global char * src0, ulong offset0, - global half * src1, + global char * src1, ulong offset1, - global float * dst, + global char * dst, ulong offsetd, int ne00, - int ne01, - int ne02, + ulong nb01, + ulong nb02, + ulong nb03, + int ne12, + int ne13, + ulong nb11, + ulong nb12, + ulong nb13, + ulong nb1, + ulong nb2, + ulong nb3, float scale, float max_bias, float m0, float m1, int n_head_log2 ) { - src0 = (global float *)((global char *)src0 + offset0); - src1 = (global half *)((global char *)src1 + offset1); - dst = (global float *)((global char *)dst + offsetd); + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; int i03 = get_group_id(2); int i02 = get_group_id(1); int i01 = get_group_id(0); - global float * psrc0 = src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; - global half * pmask = (global char *)src1 != (global char *)src0 ? src1 + i01*ne00 : 0; - global float * pdst = dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + int i13 = i03%ne13; + int i12 = i02%ne12; + int i11 = i01; + + global float * psrc0 = (global float *)(src0 + i01*nb01 + i02*nb02 + i03*nb03); + global half * pmask = src1 != src0 ? (global half *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0; + global float * pdst = (global float *)(dst + i01*nb1 + i02*nb2 + i03*nb3); float slope = 1.0f; diff --git a/ggml/src/ggml-opencl/kernels/softmax_f32.cl b/ggml/src/ggml-opencl/kernels/softmax_f32.cl index 001b587abe31e..7c53dfbe5a27c 100644 --- a/ggml/src/ggml-opencl/kernels/softmax_f32.cl +++ b/ggml/src/ggml-opencl/kernels/softmax_f32.cl @@ -22,32 +22,45 @@ REQD_SUBGROUP_SIZE_64 #endif kernel void kernel_soft_max( - global float * src0, + global char * src0, ulong offset0, - global float * src1, + global char * src1, ulong offset1, - global float * dst, + global char * dst, ulong offsetd, int ne00, - int ne01, - int ne02, + ulong nb01, + ulong nb02, + ulong nb03, + int ne12, + int ne13, + ulong nb11, + ulong nb12, + ulong nb13, + ulong nb1, + ulong nb2, + ulong nb3, float scale, float max_bias, float m0, float m1, int n_head_log2 ) { - src0 = (global float*)((global char*)src0 + offset0); - src1 = (global float*)((global char*)src1 + offset1); - dst = (global float*)((global char*)dst + offsetd); + src0 = src0 + offset0; + src1 = src1 + offset1; + dst = dst + offsetd; int i03 = get_group_id(2); int i02 = get_group_id(1); int i01 = get_group_id(0); - global float * psrc0 = src0 + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; - global float * pmask = src1 != src0 ? src1 + i01*ne00 : 0; - global float * pdst = dst + i03*ne02*ne01*ne00 + i02*ne01*ne00 + i01*ne00; + int i13 = i03%ne13; + int i12 = i02%ne12; + int i11 = i01; + + global float * psrc0 = (global float *)(src0 + i01*nb01 + i02*nb02 + i03*nb03); + global float * pmask = src1 != src0 ? (global float *)(src1 + i11*nb11 + i12*nb12 + i13*nb13) : 0; + global float * pdst = (global float *)(dst + i01*nb1 + i02*nb2 + i03*nb3); float slope = 1.0f; diff --git a/ggml/src/ggml-opencl/kernels/upscale.cl b/ggml/src/ggml-opencl/kernels/upscale.cl index 219d31dbb9248..25c68351baeb6 100644 --- a/ggml/src/ggml-opencl/kernels/upscale.cl +++ b/ggml/src/ggml-opencl/kernels/upscale.cl @@ -60,7 +60,8 @@ kernel void kernel_upscale_bilinear( float sf0, float sf1, float sf2, - float sf3 + float sf3, + float pixel_offset ) { global const char * src_base = (global const char *)p_src0 + off_src0; global float * dst_base = (global float *)((global char *)p_dst + off_dst); @@ -80,8 +81,6 @@ kernel void kernel_upscale_bilinear( int i02_src = (int)(i12_dst / sf2); int i03_src = (int)(i13_dst / sf3); - const float pixel_offset = 0.5f; - float y_src_f = ((float)i11_dst + pixel_offset) / sf1 - pixel_offset; long y0_src = (long)floor(y_src_f); long y1_src = y0_src + 1; diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c index e389a46dbed87..9a7d1b22d7983 100644 --- a/ggml/src/ggml-quants.c +++ b/ggml/src/ggml-quants.c @@ -568,14 +568,14 @@ static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, co } float iscale = nmax/(max - min); float scale = 1/iscale; - float best_mad = 0; + float best_error = 0; for (int i = 0; i < n; ++i) { int l = nearest_int(iscale*(x[i] - min)); L[i] = MAX(0, MIN(nmax, l)); float diff = scale * L[i] + min - x[i]; diff = use_mad ? fabsf(diff) : diff * diff; float w = weights[i]; - best_mad += w * diff; + best_error += w * diff; } if (nstep < 1) { *the_min = -min; @@ -601,18 +601,18 @@ static float make_qkx2_quants(int n, int nmax, const float * GGML_RESTRICT x, co this_min = 0; this_scale = sum_xl / sum_l2; } - float mad = 0; + float cur_error = 0; for (int i = 0; i < n; ++i) { float diff = this_scale * Laux[i] + this_min - x[i]; diff = use_mad ? fabsf(diff) : diff * diff; float w = weights[i]; - mad += w * diff; + cur_error += w * diff; } - if (mad < best_mad) { + if (cur_error < best_error) { for (int i = 0; i < n; ++i) { L[i] = Laux[i]; } - best_mad = mad; + best_error = cur_error; scale = this_scale; min = this_min; } diff --git a/ggml/src/ggml-sycl/backend.hpp b/ggml/src/ggml-sycl/backend.hpp index f78a36ddf8f66..f839a42bc90c9 100644 --- a/ggml/src/ggml-sycl/backend.hpp +++ b/ggml/src/ggml-sycl/backend.hpp @@ -30,6 +30,7 @@ #include "outprod.hpp" #include "quants.hpp" #include "rope.hpp" +#include "set_rows.hpp" #include "softmax.hpp" #include "tsembd.hpp" #include "wkv.hpp" diff --git a/ggml/src/ggml-sycl/binbcast.cpp b/ggml/src/ggml-sycl/binbcast.cpp index 0a3883ae1eda5..741630dba342c 100644 --- a/ggml/src/ggml-sycl/binbcast.cpp +++ b/ggml/src/ggml-sycl/binbcast.cpp @@ -225,9 +225,9 @@ struct bin_bcast_sycl { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, block_num) * - sycl::range<3>(1, 1, block_size), + sycl_parallel_for( + stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, block_num) * sycl::range<3>(1, 1, block_size), sycl::range<3>(1, 1, block_size)), [=](sycl::nd_item<3> item_ct1) { k_bin_bcast_unravel( @@ -246,9 +246,8 @@ struct bin_bcast_sycl { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_bin_bcast(src0_dd, src1_dd, dst_dd, ne0, ne1, ne2, ne3, ne10, ne11, ne12, ne13, s1, s2, s3, s01, s02, s03, s11, s12, s13, diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index 753b4af143622..4e7449d06ecfe 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -199,7 +199,7 @@ struct sycl_device_info { // size_t smpb; // max. shared memory per block bool vmm; // virtual memory support size_t total_vram; - sycl_hw_info hw_info; + //sycl_hw_info hw_info; \\ device id and aarch, currently not used optimize_feature opt_feature; }; @@ -286,29 +286,6 @@ struct ggml_tensor_extra_gpu { void release_extra_gpu(ggml_tensor_extra_gpu * extra, std::vector streams={}); -inline optimize_feature check_gpu_optimize_feature(syclex::architecture &arch) { - optimize_feature opt; - - opt.reorder = - (arch == syclex::architecture::intel_gpu_dg1 || - arch == syclex::architecture::intel_gpu_acm_g10 || - arch == syclex::architecture::intel_gpu_acm_g11 || - arch == syclex::architecture::intel_gpu_acm_g12 || - arch == syclex::architecture::intel_gpu_pvc || - arch == syclex::architecture::intel_gpu_pvc_vg || - arch == syclex::architecture::intel_gpu_mtl_u || - arch == syclex::architecture::intel_gpu_mtl_s || - arch == syclex::architecture::intel_gpu_mtl_h || - arch == syclex::architecture::intel_gpu_arl_u || - arch == syclex::architecture::intel_gpu_arl_s || - arch == syclex::architecture::intel_gpu_arl_h || - arch == syclex::architecture::intel_gpu_bmg_g21 || - arch == syclex::architecture::intel_gpu_lnl_m - ); - - return opt; -} - namespace sycl_ex = sycl::ext::oneapi::experimental; struct ggml_backend_sycl_context { int device; diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml/src/ggml-sycl/concat.cpp index 7aa91c861d583..3501484a14611 100644 --- a/ggml/src/ggml-sycl/concat.cpp +++ b/ggml/src/ggml-sycl/concat.cpp @@ -89,33 +89,24 @@ static void concat_f32_sycl(const float *x, const float *y, float *dst, sycl::range<3> gridDim(ne2, ne1, num_blocks); switch (dim) { case 0: - stream->parallel_for( - sycl::nd_range<3>(gridDim * - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1); - }); - break; + sycl_parallel_for(stream, + sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { concat_f32_dim0(x, y, dst, ne0, ne00, item_ct1); }); + break; case 1: - stream->parallel_for( - sycl::nd_range<3>(gridDim * - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1); - }); - break; + sycl_parallel_for(stream, + sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { concat_f32_dim1(x, y, dst, ne0, ne01, item_ct1); }); + break; // dim >=2 will be dispatched to the default path default: - stream->parallel_for( - sycl::nd_range<3>(gridDim * - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1); - }); - break; + sycl_parallel_for(stream, + sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CONCAT_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { concat_f32_dim2(x, y, dst, ne0, ne02, item_ct1); }); + break; } } @@ -129,33 +120,29 @@ static void concat_f32_sycl_non_cont( int64_t ne2, int64_t ne3, uint64_t nb0, uint64_t nb1, uint64_t nb2, uint64_t nb3, int32_t dim) { sycl::range<3> gridDim(ne3, ne2, ne1); - stream->parallel_for( - sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - int64_t i3 = item_ct1.get_group(0); - int64_t i2 = item_ct1.get_group(1); - int64_t i1 = item_ct1.get_group(2); + sycl_parallel_for(stream, sycl::nd_range<3>(gridDim, sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { + int64_t i3 = item_ct1.get_group(0); + int64_t i2 = item_ct1.get_group(1); + int64_t i1 = item_ct1.get_group(2); - int64_t o[4] = {0, 0, 0, 0}; - o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03)); + int64_t o[4] = { 0, 0, 0, 0 }; + o[dim] = dim == 0 ? ne00 : (dim == 1 ? ne01 : (dim == 2 ? ne02 : ne03)); - const float *x; + const float * x; - for (int i0 = item_ct1.get_local_id(2); i0 < ne0; - i0 += item_ct1.get_local_range(2)) { + for (int i0 = item_ct1.get_local_id(2); i0 < ne0; i0 += item_ct1.get_local_range(2)) { if (i0 < ne00 && i1 < ne01 && i2 < ne02 && i3 < ne03) { - x = (const float *)(src0 + (i3)*nb03 + (i2)*nb02 + (i1)*nb01 + - (i0)*nb00); + x = (const float *) (src0 + (i3) *nb03 + (i2) *nb02 + (i1) *nb01 + (i0) *nb00); } else { - x = (const float *)(src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 + - (i1 - o[1]) * nb11 + (i0 - o[0]) * nb10); + x = (const float *) (src1 + (i3 - o[3]) * nb13 + (i2 - o[2]) * nb12 + (i1 - o[1]) * nb11 + + (i0 - o[0]) * nb10); } float *y = (float *)(dst + i3 * nb3 + i2 * nb2 + i1 * nb1 + i0 * nb0); *y = *x; - } - }); + } + }); } void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { diff --git a/ggml/src/ggml-sycl/conv.cpp b/ggml/src/ggml-sycl/conv.cpp index 475bd34a25d56..c2f991e8d64a7 100644 --- a/ggml/src/ggml-sycl/conv.cpp +++ b/ggml/src/ggml-sycl/conv.cpp @@ -59,16 +59,10 @@ static void conv_transpose_1d_f32_f32_sycl( const int num_blocks = (output_size + SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE - 1) / SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE; const sycl::range<3> block_dims(1, 1, SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE); const sycl::range<3> block_nums(1, 1, num_blocks); - stream->parallel_for( - sycl::nd_range<3>( - block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { - conv_transpose_1d_kernel( - s0, output_size, - src0_ne0, src0_ne1, src0_ne2, - src1_ne0, dst_ne0, - src0, src1, dst, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { + conv_transpose_1d_kernel(s0, output_size, src0_ne0, src0_ne1, src0_ne2, src1_ne0, dst_ne0, src0, src1, dst, + item_ct1); + }); } void ggml_sycl_op_conv_transpose_1d(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml/src/ggml-sycl/convert.cpp index 96d2583b13b83..0ef567122dddb 100644 --- a/ggml/src/ggml-sycl/convert.cpp +++ b/ggml/src/ggml-sycl/convert.cpp @@ -33,14 +33,11 @@ static void dequantize_block_sycl(const void *__restrict__ vx, { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>( - sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block(vx, y, k, item_ct1); - }); + sycl_parallel_for( + stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_DEQUANTIZE_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block(vx, y, k, item_ct1); }); } } @@ -53,24 +50,18 @@ static void dequantize_row_q2_K_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 64), - sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q2_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q2_K(vx, y, item_ct1); }); } #else { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q2_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q2_K(vx, y, item_ct1); }); } #endif @@ -85,24 +76,18 @@ static void dequantize_row_q3_K_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 64), - sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q3_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q3_K(vx, y, item_ct1); }); } #else { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q3_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q3_K(vx, y, item_ct1); }); } #endif } @@ -116,12 +101,9 @@ static void dequantize_row_q4_0_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q4_0(vx, y, nb32, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q4_0(vx, y, nb32, item_ct1); }); } } @@ -135,13 +117,12 @@ static void dequantize_row_q4_0_sycl_reorder(const void *vx, dst_t *y, const int int constexpr WARP_K = WARP_SIZE * QK4_0; const int n_warp = (k + WARP_K - 1) / WARP_K; GGML_ASSERT(k % 2 == 0); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, n_warp) * - sycl::range<3>(1, 1, WARP_SIZE), - sycl::range<3>(1, 1, WARP_SIZE)), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]]{ - dequantize_block_q4_0_reorder(vx, y, k, item_ct1); - }); - + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, n_warp) * sycl::range<3>(1, 1, WARP_SIZE), + sycl::range<3>(1, 1, WARP_SIZE)), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_block_q4_0_reorder(vx, y, k, item_ct1); + }); } template @@ -153,12 +134,9 @@ static void dequantize_row_q4_1_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q4_1(vx, y, nb32, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q4_1(vx, y, nb32, item_ct1); }); } } @@ -171,14 +149,13 @@ static void dequantize_row_q4_K_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor scale_local_acc(sycl::range<1>(12), cgh); - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q4_K(vx, y, get_pointer(scale_local_acc), item_ct1); - }); + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_q4_K(vx, y, get_pointer(scale_local_acc), item_ct1); + }); }); } } @@ -191,13 +168,13 @@ static void dequantize_row_q4_K_sycl_reorder(const void * vx, dst_t * y, const i dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->submit([&](sycl::handler & cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor scale_local_acc(sycl::range<1>(12), cgh); - cgh.parallel_for(sycl::nd_range<1>(sycl::range<1>(global_size), sycl::range<1>(local_size)), - [=](sycl::nd_item<1> item_ct1) { - dequantize_block_q4_K_reorder(vx, y, get_pointer(scale_local_acc), item_ct1, nb); - }); + sycl_parallel_for<1>(cgh, sycl::nd_range<1>(sycl::range<1>(global_size), sycl::range<1>(local_size)), + [=](sycl::nd_item<1> item_ct1) { + dequantize_block_q4_K_reorder(vx, y, get_pointer(scale_local_acc), item_ct1, nb); + }); }); } @@ -210,24 +187,18 @@ static void dequantize_row_q5_K_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 64), - sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q5_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q5_K(vx, y, item_ct1); }); } #else { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q5_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q5_K(vx, y, item_ct1); }); } #endif @@ -242,24 +213,18 @@ static void dequantize_row_q6_K_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 64), - sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q6_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K(vx, y, item_ct1); }); } #else { dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_q6_K(vx, y, item_ct1); - }); + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K(vx, y, item_ct1); }); } #endif @@ -271,9 +236,9 @@ static void dequantize_row_q6_K_sycl_reorder(const void * vx, dst_t * y, const i dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K_reorder(vx, y, item_ct1, nb); }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_q6_K_reorder(vx, y, item_ct1, nb); }); } template @@ -284,15 +249,10 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq1_s( - vx, y, item_ct1, iq1s_grid_gpu - ); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq1_s(vx, y, item_ct1, iq1s_grid_gpu); }); }); } } @@ -305,15 +265,10 @@ static void dequantize_row_iq1_m_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq1_m( - vx, y, item_ct1, iq1s_grid_gpu - ); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq1_m(vx, y, item_ct1, iq1s_grid_gpu); }); }); } } @@ -326,15 +281,12 @@ static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int64_t dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq2_xxs( - vx, y, item_ct1, iq2xxs_grid, - ksigns_iq2xs, kmask_iq2xs); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_iq2_xxs(vx, y, item_ct1, iq2xxs_grid, ksigns_iq2xs, kmask_iq2xs); + }); }); } } @@ -347,15 +299,12 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int64_t k dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq2_xs( - vx, y, item_ct1, iq2xs_grid, - ksigns_iq2xs, kmask_iq2xs); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_iq2_xs(vx, y, item_ct1, iq2xs_grid, ksigns_iq2xs, kmask_iq2xs); + }); }); } } @@ -368,13 +317,10 @@ static void dequantize_row_iq2_s_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq2_s(vx, y, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq2_s(vx, y, item_ct1); }); }); } } @@ -388,15 +334,12 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int64_t dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq3_xxs( - vx, y, item_ct1, iq3xxs_grid, - ksigns_iq2xs, kmask_iq2xs); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { + dequantize_block_iq3_xxs(vx, y, item_ct1, iq3xxs_grid, ksigns_iq2xs, kmask_iq2xs); + }); }); } } @@ -409,14 +352,10 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int64_t k, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq3_s( - vx, y, item_ct1, kmask_iq2xs, iq3s_grid); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq3_s(vx, y, item_ct1, kmask_iq2xs, iq3s_grid); }); }); } } @@ -432,14 +371,11 @@ static void dequantize_row_iq4_xs_sycl(const void *vx, dst_t *y, const int64_t k dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq4_xs(vx, y, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, + sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq4_xs(vx, y, item_ct1); }); }); } #endif @@ -453,14 +389,11 @@ static void dequantize_row_iq4_nl_sycl(const void *vx, dst_t *y, const int64_t k dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * - sycl::range<3>(1, 1, 32), - sycl::range<3>(1, 1, 32)), - [=](sycl::nd_item<3> item_ct1) { - dequantize_block_iq4_nl(vx, y, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for( + cgh, + sycl::nd_range<3>(sycl::range<3>(1, 1, nb) * sycl::range<3>(1, 1, 32), sycl::range<3>(1, 1, 32)), + [=](sycl::nd_item<3> item_ct1) { dequantize_block_iq4_nl(vx, y, item_ct1); }); }); } } diff --git a/ggml/src/ggml-sycl/cpy.cpp b/ggml/src/ggml-sycl/cpy.cpp index bec1371401955..1ffd7f1226724 100644 --- a/ggml/src/ggml-sycl/cpy.cpp +++ b/ggml/src/ggml-sycl/cpy.cpp @@ -413,7 +413,8 @@ static void ggml_cpy_f16_f32_sycl(const char * cx, char * cdst, const int ne, co { dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -431,7 +432,8 @@ static void ggml_cpy_f32_f32_sycl(const char * cx, char * cdst, const int ne, co { dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -449,7 +451,8 @@ static void ggml_cpy_f32_f16_sycl(const char * cx, char * cdst, const int ne, co { dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -465,11 +468,11 @@ static void ggml_cpy_f32_q8_0_sycl(const char * cx, char * cdst, const int ne, c const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK8_0 == 0); const int num_blocks = ne / QK8_0; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_q8_0_f32_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -477,11 +480,11 @@ static void ggml_cpy_q8_0_f32_sycl(const char * cx, char * cdst, const int ne, c const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ne; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_q_f32(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_f32(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_f32_q4_0_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -490,11 +493,11 @@ static void ggml_cpy_f32_q4_0_sycl(const char * cx, char * cdst, const int ne, c const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK4_0 == 0); const int num_blocks = ne / QK4_0; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_q4_0_f32_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -502,8 +505,9 @@ static void ggml_cpy_q4_0_f32_sycl(const char * cx, char * cdst, const int ne, c const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ne; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { cpy_q_f32, QK4_0>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); @@ -516,11 +520,11 @@ static void ggml_cpy_f32_q4_1_sycl(const char * cx, char * cdst, const int ne, c const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK4_1 == 0); const int num_blocks = ne / QK4_1; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_q4_1_f32_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -528,8 +532,9 @@ static void ggml_cpy_q4_1_f32_sycl(const char * cx, char * cdst, const int ne, c const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ne; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { cpy_q_f32, QK4_1>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); @@ -542,11 +547,11 @@ static void ggml_cpy_f32_q5_0_sycl(const char * cx, char * cdst, const int ne, c const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK5_0 == 0); const int num_blocks = ne / QK5_0; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_q5_0_f32_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -554,8 +559,9 @@ static void ggml_cpy_q5_0_f32_sycl(const char * cx, char * cdst, const int ne, c const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ne; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { cpy_q_f32, QK5_0>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); @@ -568,11 +574,11 @@ static void ggml_cpy_f32_q5_1_sycl(const char * cx, char * cdst, const int ne, c const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK5_1 == 0); const int num_blocks = ne / QK5_1; - stream->parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), - [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, - ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_q5_1_f32_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -580,8 +586,9 @@ static void ggml_cpy_q5_1_f32_sycl(const char * cx, char * cdst, const int ne, c const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ne; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { cpy_q_f32, QK5_1>(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); @@ -594,11 +601,11 @@ static void ggml_cpy_f32_iq4_nl_sycl(const char * cx, char * cdst, const int ne, const int nb12, const int nb13, queue_ptr stream) { GGML_ASSERT(ne % QK4_NL == 0); const int num_blocks = ne / QK4_NL; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), [=](sycl::nd_item<3> item_ct1) { - cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, - ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks), sycl::range<3>(1, 1, 1)), + [=](sycl::nd_item<3> item_ct1) { + cpy_f32_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, + ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } static void ggml_cpy_f16_f16_sycl(const char * cx, char * cdst, const int ne, const int ne00, const int ne01, @@ -609,7 +616,8 @@ static void ggml_cpy_f16_f16_sycl(const char * cx, char * cdst, const int ne, co { dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -628,7 +636,8 @@ static void ggml_cpy_i16_i16_sycl(const char * cx, char * cdst, const int ne, co // dpct::has_capability_or_fail(stream->get_device(), // {sycl::aspect::fp16}); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -647,7 +656,8 @@ static void ggml_cpy_i32_i32_sycl(const char * cx, char * cdst, const int ne, co // dpct::has_capability_or_fail(stream->get_device(), // {sycl::aspect::fp16}); - stream->parallel_for( + sycl_parallel_for( + stream, sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { @@ -662,11 +672,13 @@ static void ggml_cpy_q8_0_q8_0(const char * cx, char * cdst, const int ne, const const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ceil_div(ne, SYCL_CPY_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, + ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } @@ -675,11 +687,13 @@ static void ggml_cpy_q5_0_q5_0(const char * cx, char * cdst, const int ne, const const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ceil_div(ne, SYCL_CPY_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, + ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } @@ -689,11 +703,13 @@ static void ggml_cpy_q5_1_q5_1(const char * cx, char * cdst, const int ne, const const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ceil_div(ne, SYCL_CPY_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, + ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } @@ -702,10 +718,13 @@ static void ggml_cpy_q4_0_q4_0(const char * cx, char * cdst, const int ne, const const int ne10, const int ne11, const int ne12, const int nb10, const int nb11, const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ceil_div(ne, SYCL_CPY_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, + ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } @@ -715,10 +734,13 @@ static void ggml_cpy_q4_1_q4_1(const char * cx, char * cdst, const int ne, const const int nb12, const int nb13, queue_ptr stream) { const int num_blocks = ceil_div(ne, SYCL_CPY_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, ne12, nb10, nb11, nb12, nb13, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_CPY_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { + cpy_q_q(cx, cdst, ne, ne00, ne01, ne02, nb00, nb01, nb02, nb03, ne10, ne11, + ne12, nb10, nb11, nb12, nb13, item_ct1); + }); } void ggml_sycl_cpy(ggml_backend_sycl_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1) try { diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml/src/ggml-sycl/dmmv.cpp index 4f2760110c212..70579c0c3be11 100644 --- a/ggml/src/ggml-sycl/dmmv.cpp +++ b/ggml/src/ggml-sycl/dmmv.cpp @@ -208,12 +208,10 @@ static void convert_mul_mat_vec_f16_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec<1, 1, convert_f16>(vx, y, dst, ncols, - nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec<1, 1, convert_f16>(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -877,12 +875,11 @@ static void dequantize_mul_mat_vec_q4_0_sycl_reorder(const void *vx, const dfloa dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec_reorder( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec_reorder(vx, y, dst, ncols, + nrows, item_ct1); + }); } } @@ -900,12 +897,10 @@ static void dequantize_mul_mat_vec_q4_0_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -921,12 +916,10 @@ static void dequantize_mul_mat_vec_q4_1_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -942,12 +935,10 @@ static void dequantize_mul_mat_vec_q5_0_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -963,12 +954,10 @@ static void dequantize_mul_mat_vec_q5_1_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -984,12 +973,10 @@ static void dequantize_mul_mat_vec_q8_0_sycl(const void *vx, const dfloat *y, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - dequantize_mul_mat_vec( - vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + dequantize_mul_mat_vec(vx, y, dst, ncols, nrows, item_ct1); + }); } } @@ -1002,11 +989,10 @@ static void dequantize_mul_mat_vec_q2_K_sycl(const void *vx, const float *y, const int block_num_y = (nrows + ny - 1) / ny; const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, ny, QK_WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { - dequantize_mul_mat_vec_q2_k(vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { + dequantize_mul_mat_vec_q2_k(vx, y, dst, ncols, nrows, item_ct1); + }); } static void dequantize_mul_mat_vec_q3_K_sycl(const void *vx, const float *y, @@ -1018,11 +1004,10 @@ static void dequantize_mul_mat_vec_q3_K_sycl(const void *vx, const float *y, const int block_num_y = (nrows + ny - 1) / ny; const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, ny, QK_WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { - dequantize_mul_mat_vec_q3_k(vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { + dequantize_mul_mat_vec_q3_k(vx, y, dst, ncols, nrows, item_ct1); + }); } static void dequantize_mul_mat_vec_q4_K_sycl(const void *vx, const float *y, @@ -1034,11 +1019,10 @@ static void dequantize_mul_mat_vec_q4_K_sycl(const void *vx, const float *y, const int block_num_y = (nrows + ny - 1) / ny; const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, ny, QK_WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { - dequantize_mul_mat_vec_q4_k(vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { + dequantize_mul_mat_vec_q4_k(vx, y, dst, ncols, nrows, item_ct1); + }); } static void dequantize_mul_mat_vec_q5_K_sycl(const void *vx, const float *y, @@ -1047,11 +1031,10 @@ static void dequantize_mul_mat_vec_q5_K_sycl(const void *vx, const float *y, dpct::queue_ptr stream) { GGML_ASSERT(ncols % QK_K == 0); const sycl::range<3> block_dims(1, 1, QK_WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { - dequantize_mul_mat_vec_q5_k(vx, y, dst, ncols, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { + dequantize_mul_mat_vec_q5_k(vx, y, dst, ncols, item_ct1); + }); } static void dequantize_mul_mat_vec_q6_K_sycl(const void *vx, const float *y, @@ -1063,11 +1046,10 @@ static void dequantize_mul_mat_vec_q6_K_sycl(const void *vx, const float *y, const int block_num_y = (nrows + ny - 1) / ny; const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, ny, QK_WARP_SIZE); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { - dequantize_mul_mat_vec_q6_k(vx, y, dst, ncols, nrows, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(QK_WARP_SIZE)]] { + dequantize_mul_mat_vec_q6_k(vx, y, dst, ncols, nrows, item_ct1); + }); } void ggml_sycl_op_dequantize_mul_mat_vec( diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml/src/ggml-sycl/dpct/helper.hpp index d538965b096bf..27c7278607832 100644 --- a/ggml/src/ggml-sycl/dpct/helper.hpp +++ b/ggml/src/ggml-sycl/dpct/helper.hpp @@ -13,10 +13,10 @@ #ifndef GGML_SYCL_DPCT_HELPER_HPP #define GGML_SYCL_DPCT_HELPER_HPP +#include #include #include #include -#include #ifdef GGML_SYCL_USE_INTEL_ONEMKL #include @@ -118,6 +118,36 @@ inline auto get_onemath_backend(sycl::queue& queue) #endif } +#ifdef SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS + namespace syclex = sycl::ext::oneapi::experimental; +#endif + +template +__dpct_inline__ void sycl_parallel_for(sycl::handler & cgh, sycl::nd_range nd_range, Func && func) { +#ifdef SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS + syclex::nd_launch(cgh, nd_range, func); +#else + cgh.parallel_for(nd_range, func); +#endif +} + +template +__dpct_inline__ void sycl_parallel_for(sycl::queue * q, sycl::nd_range nd_range, Func && func) { +#ifdef SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS + syclex::nd_launch(*q, nd_range, func); +#else + q->parallel_for(nd_range, func); +#endif +} + +template __dpct_inline__ void sycl_launch(sycl::queue * stream, Func && func) { +#ifdef SYCL_EXT_ONEAPI_ENQUEUE_FUNCTIONS + syclex::submit(*stream, func); +#else + stream->submit(func); +#endif +} + namespace dpct { typedef sycl::queue *queue_ptr; diff --git a/ggml/src/ggml-sycl/element_wise.cpp b/ggml/src/ggml-sycl/element_wise.cpp index 5b7c4f0b4f003..0363b06a3ec9b 100644 --- a/ggml/src/ggml-sycl/element_wise.cpp +++ b/ggml/src/ggml-sycl/element_wise.cpp @@ -1,12 +1,19 @@ #include "common.hpp" +#include "ggml-sycl/presets.hpp" #include "ggml.h" #include "element_wise.hpp" +#define SYCL_GLOBAL_ID_LOOP(K, ITEM) \ + for (auto i = ITEM.get_global_id(0); i < (size_t)K; i += ITEM.get_global_range(0)) + +#define SYCL_LOCAL_ID_CALC(ITEM, IDX) \ + (ITEM.get_local_range(IDX) * ITEM.get_group(IDX) + ITEM.get_local_id(IDX)) + + static void acc_f32(const float * x, const float * y, float * dst, const int ne, const int ne10, const int ne11, const int ne12, - const int nb1, const int nb2, int offset, const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); + const int nb1, const int nb2, int offset, const sycl::nd_item<1> &item_ct1) { + const int i = SYCL_LOCAL_ID_CALC(item_ct1, 0); if (i >= ne) { return; } @@ -21,248 +28,280 @@ static void acc_f32(const float * x, const float * y, float * dst, const int ne, } } +/* Unary OP funcs */ template -static void sgn(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) { - for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) { - dst[i] = x[i] > static_cast(0.f) ? static_cast(1.f) : ((x[i] < static_cast(0.f) ? static_cast(-1.f) : static_cast(0.f))); - } +static __dpct_inline__ T op_sgn(T x) { + return x > static_cast(0.f) ? static_cast(1.f) : ((x < static_cast(0.f) ? static_cast(-1.f) : static_cast(0.f))); } template -static void abs_op(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) { - for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) { - dst[i] = sycl::fabs(x[i]); - } +static __dpct_inline__ T op_abs(T x) { + return sycl::fabs(x); } template -static void elu_op(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) { - for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) { - dst[i] = (x[i] > static_cast(0.f)) ? x[i] : sycl::expm1(x[i]); - } +static __dpct_inline__ T op_elu(T x) { + return (x > static_cast(0.f)) ? x : sycl::expm1(x); } template -static void gelu(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { +static __dpct_inline__ T op_gelu(T x) { const T GELU_COEF_A = static_cast(0.044715f); const T SQRT_2_OVER_PI = static_cast(0.79788456080286535587989211986876f); - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); + return static_cast(0.5f) * x * + (static_cast(1.0f) + + sycl::tanh(SQRT_2_OVER_PI * x * (static_cast(1.0f) + GELU_COEF_A * x * x))); +} - if (i >= k) { - return; - } +template +static __dpct_inline__ T op_silu(T x) { + return x / (static_cast(1.0f) + sycl::native::exp(-x)); +} - float xi = x[i]; - dst[i] = static_cast(0.5f) * xi * - (static_cast(1.0f) + - sycl::tanh(SQRT_2_OVER_PI * xi * (static_cast(1.0f) + GELU_COEF_A * xi * xi))); +template +static __dpct_inline__ T op_gelu_quick(T x) { + const T GELU_QUICK_COEF_LOCAL = static_cast(-1.702f); + return x * (static_cast(1.0f) / (static_cast(1.0f) + sycl::native::exp(GELU_QUICK_COEF_LOCAL * x))); } template -static void silu(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static __dpct_inline__ T op_gelu_erf(T x) { + const T SQRT_2_INV = static_cast(0.70710678118654752440084436210484f); + return static_cast(0.5f) * x * (static_cast(1.0f) + sycl::erf(x * SQRT_2_INV)); +} - if (i >= k) { - return; - } - dst[i] = x[i] / (static_cast(1.0f) + sycl::native::exp(-x[i])); +template +static __dpct_inline__ T op_tanh(T x) { + return sycl::tanh(x); } template -static void gelu_quick(const T *x, T *dst, int k, - const sycl::nd_item<3> &item_ct1) { - const float GELU_QUICK_COEF = -1.702f; - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - if (i >= k) { - return; - } - dst[i] = x[i] * (static_cast(1.0f) / (static_cast(1.0f) + sycl::native::exp(GELU_QUICK_COEF * x[i]))); +static __dpct_inline__ T op_relu(T x) { + return sycl::fmax(x, static_cast(0)); } template -static void gelu_erf(const T * x, T * dst, const int k, const sycl::nd_item<3> &item_ct1) { - const T SQRT_2_INV = static_cast(0.70710678118654752440084436210484f); - for(auto i = item_ct1.get_global_id(2); i < (const size_t)k; i += item_ct1.get_global_range(2)) { - auto x_i = x[i]; - dst[i] = static_cast(0.5f) * x_i * (static_cast(1.0f) + sycl::erf(x_i * SQRT_2_INV)); - } +static __dpct_inline__ T op_sigmoid(T x) { + return static_cast(1.0f) / (static_cast(1.0f) + sycl::native::exp(-x)); } template -static void tanh(const T *x, T *dst, int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - if (i >= k) { - return; - } - dst[i] = sycl::tanh((x[i])); +static __dpct_inline__ T op_sqrt(T x) { + return sycl::sqrt(x); +} + +template +static __dpct_inline__ T op_sin(T x) { + return sycl::sin(x); } template -static void relu(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static __dpct_inline__ T op_cos(T x) { + return sycl::cos(x); +} - if (i >= k) { - return; - } - dst[i] = sycl::fmax((x[i]), static_cast(0)); +template +static __dpct_inline__ T op_hardsigmoid(T x) { + return sycl::fmin(static_cast(1.0f), sycl::fmax(static_cast(0.0f), (x + static_cast(3.0f)) / static_cast(6.0f))); } template -static void sigmoid(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static __dpct_inline__ T op_hardswish(T x) { + return x * sycl::fmin(static_cast(1.0f), sycl::fmax(static_cast(0.0f), (x + static_cast(3.0f)) / static_cast(6.0f))); +} - if (i >= k) { - return; +template +static __dpct_inline__ T op_exp(T x) { + return sycl::exp(x); +} + +template +static __dpct_inline__ T op_log(T x) { + if (x <= static_cast(0)) { + return neg_infinity(); } - dst[i] = 1.0f / (static_cast(1.0f) + sycl::native::exp(-x[i])); + return sycl::log(x); } template -static void sqrt(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static __dpct_inline__ T op_neg(T x) { + return -x; +} - if (i >= k) { - return; +template +static __dpct_inline__ T op_step(T x) { + return (x > static_cast(0.0f)) ? static_cast(1.0f) : static_cast(0.0f); +} + +template +static __dpct_inline__ T op_leaky_relu(T x, float negative_slope) { + T neg_slope_T = static_cast(negative_slope); + return sycl::fmax(x, static_cast(0)) + + sycl::fmin(x, static_cast(0.0f)) * neg_slope_T; +} + +template +static __dpct_inline__ T op_sqr(T x) { + return x * x; +} + +template +static __dpct_inline__ T op_clamp(T x, float min_val, float max_val) { + return x < static_cast(min_val) ? static_cast(min_val) : (x > static_cast(max_val) ? static_cast(max_val) : x); +} + +template +static void unary_op_sgn_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_sgn(x[i]); } - dst[i] = sycl::sqrt(x[i]); } template -static void sin(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_abs_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_abs(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_elu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_elu(x[i]); } - dst[i] = sycl::sin(x[i]); } template -static void cos(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_gelu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_gelu(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_silu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_silu(x[i]); } - dst[i] = sycl::cos(x[i]); } template -static void hardsigmoid(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_gelu_quick_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_gelu_quick(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_gelu_erf_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_gelu_erf(x[i]); + } +} + +template +static void unary_op_tanh_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_tanh(x[i]); } - dst[i] = sycl::fmin(static_cast(1.0f), sycl::fmax(static_cast(0.0f), (x[i] + static_cast(3.0f)) / static_cast(6.0f))); } template -static void hardswish(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_relu_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_relu(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_sigmoid_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_sigmoid(x[i]); } - dst[i] = x[i] * sycl::fmin(static_cast(1.0f), sycl::fmax(static_cast(0.0f), (x[i] + static_cast(3.0f)) / static_cast(6.0f))); } template -static void exp(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_sqrt_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_sqrt(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_sin_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_sin(x[i]); } - dst[i] = sycl::exp(x[i]); } template -static void log(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_cos_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_cos(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_hardsigmoid_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_hardsigmoid(x[i]); } - T xi = x[i]; - if (xi <= 0) { - dst[i] = neg_infinity(); - } else { - dst[i] = sycl::log(xi); +} + +template +static void unary_op_hardswish_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_hardswish(x[i]); } } template -static void neg(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_exp_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_exp(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_log_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_log(x[i]); } - dst[i] = -x[i]; } template -static void step(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_neg_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_neg(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_step_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_step(x[i]); } - dst[i] = x[i] > static_cast(0.0f); } template -static void leaky_relu(const T *x, T *dst, const int k, const float negative_slope, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - if (i >= k) { - return; +static void unary_op_leaky_relu_kernel(const T * x, T * dst, const int k, float negative_slope, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_leaky_relu(x[i], negative_slope); } - dst[i] = sycl::fmax((x[i]), static_cast(0)) + - sycl::fmin((x[i]), static_cast(0.0f)) * negative_slope; } template -static void sqr(const T * x, T * dst, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); +static void unary_op_sqr_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_sqr(x[i]); + } +} - if (i >= k) { - return; +template +static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1, float min_val, float max_val) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = op_clamp(x[i], min_val, max_val); } - dst[i] = x[i] * x[i]; } template @@ -281,10 +320,10 @@ static void upscale(const T *x, T *dst, const int nb00, const int nb01, int i12 = (index / (ne10 * ne11)) % ne12; int i13 = (index / (ne10 * ne11 * ne12)) % ne13; - int i00 = i10 / sf0; - int i01 = i11 / sf1; - int i02 = i12 / sf2; - int i03 = i13 / sf3; + int i00 = static_cast(i10 / sf0); + int i01 = static_cast(i11 / sf1); + int i02 = static_cast(i12 / sf2); + int i03 = static_cast(i13 / sf3); dst[index] = *(const T *)((const char *)x + i03 * nb03 + i02 * nb02 + i01 * nb01 + i00 * nb00); } @@ -292,8 +331,7 @@ static void upscale(const T *x, T *dst, const int nb00, const int nb01, template static void pad(const T *x, T *dst, const int ne0, const int ne00, const int ne01, const int ne02, const sycl::nd_item<3> &item_ct1) { - int nidx = item_ct1.get_local_id(2) + - item_ct1.get_group(2) * item_ct1.get_local_range(2); + int nidx = SYCL_LOCAL_ID_CALC(item_ct1, 2); if (nidx >= ne0) { return; } @@ -310,299 +348,72 @@ static void pad(const T *x, T *dst, const int ne0, const int ne00, const int ne } } - template static void clamp(const T * x, T * dst, const float min, const float max, const int k, - const sycl::nd_item<3> &item_ct1) { - const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + - item_ct1.get_local_id(2); - - if (i >= k) { - return; + const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + dst[i] = x[i] < static_cast(min) ? static_cast(min) : (x[i] > static_cast(max) ? static_cast(max) : x[i]); } - - dst[i] = x[i] < static_cast(min) ? static_cast(min) : (x[i] > static_cast(max) ? static_cast(max) : x[i]); -} - -static void acc_f32_sycl(const float *x, const float *y, float *dst, - const int n_elements, const int ne10, const int ne11, - const int ne12, const int nb1, const int nb2, - const int offset, queue_ptr stream) { - int num_blocks = (n_elements + SYCL_ACC_BLOCK_SIZE - 1) / SYCL_ACC_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_ACC_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset, - item_ct1); - }); -} - -template -static void gelu_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - gelu(x, dst, k, item_ct1); - }); -} - -template -static void silu_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SILU_BLOCK_SIZE - 1) / SYCL_SILU_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SILU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - silu(x, dst, k, item_ct1); - }); -} - -template -static void sgn_sycl(const T * x, T * dst, const int k, queue_ptr stream) { - // hard code for now - const int num_blocks = ceil_div(k, 256); - stream->parallel_for( - sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range(1, 1, 256)), sycl::range(1, 1, 256)), [=](sycl::nd_item<3> item_ct1) { - sgn(x, dst, k, item_ct1); - }); -} - -template -static void abs_sycl(const T * x, T * dst, const int k, queue_ptr stream) { - // hard code for now - const int num_blocks = ceil_div(k, 256); - stream->parallel_for( - sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)), [=](sycl::nd_item<3> item_ct1) { - abs_op(x, dst, k, item_ct1); - }); -} - - -template -static void elu_sycl(const T * x, T * dst, const int k, queue_ptr stream) { - // hard code for now - const int num_blocks = ceil_div(k, 256); - stream->parallel_for( - sycl::nd_range<3>((sycl::range<3>(1, 1, num_blocks) * sycl::range<3>(1, 1, 256)), sycl::range<3>(1, 1, 256)), [=](sycl::nd_item<3> item_ct1) { - elu_op(x, dst, k, item_ct1); - }); -} - -template -static void gelu_quick_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_GELU_BLOCK_SIZE - 1) / SYCL_GELU_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - gelu_quick(x, dst, k, item_ct1); - }); -} - - -template -static void gelu_erf_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_GELU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - gelu_erf(x, dst, k, item_ct1); - }); -} - -template -static void tanh_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_TANH_BLOCK_SIZE - 1) / SYCL_TANH_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_TANH_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - tanh(x, dst, k, item_ct1); - }); -} - -template -static void relu_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - relu(x, dst, k, item_ct1); - }); -} - -template -static void hardsigmoid_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_HARDSIGMOID_BLOCK_SIZE - 1) / SYCL_HARDSIGMOID_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_HARDSIGMOID_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - hardsigmoid(x, dst, k, item_ct1); - }); -} - -template -static void hardswish_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_HARDSWISH_BLOCK_SIZE - 1) / SYCL_HARDSWISH_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_HARDSWISH_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - hardswish(x, dst, k, item_ct1); - }); -} - -template -static void exp_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - exp(x, dst, k, item_ct1); - }); } template -static void log_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_EXP_BLOCK_SIZE - 1) / SYCL_EXP_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_EXP_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - log(x, dst, k, item_ct1); - }); -} - -template -static void neg_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - neg(x, dst, k, item_ct1); - }); -} - -template -static void step_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_NEG_BLOCK_SIZE - 1) / SYCL_NEG_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_NEG_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - step(x, dst, k, item_ct1); - }); -} - -template -static void sigmoid_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SIGMOID_BLOCK_SIZE - 1) / SYCL_SIGMOID_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SIGMOID_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - sigmoid(x, dst, k, item_ct1); - }); +static void gated_op_fused_geglu(const T * x, const T * g, T * dst, const uint64_t k, const uint64_t n, const uint64_t o0, const uint64_t o1, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + dst[i] = op_gelu(x[j0]) * g[j1]; + } } template -static void sqrt_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SQRT_BLOCK_SIZE - 1) / SYCL_SQRT_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SQRT_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - sqrt(x, dst, k, item_ct1); - }); +static void gated_op_fused_reglu(const T * x, const T * g, T * dst, const uint64_t k, const uint64_t n, const uint64_t o0, const uint64_t o1, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + dst[i] = op_relu(x[j0]) * g[j1]; + } } template -static void sin_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - sin(x, dst, k, item_ct1); - }); +static void gated_op_fused_swiglu(const T * x, const T * g, T * dst, const uint64_t k, const uint64_t n, const uint64_t o0, const uint64_t o1, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + dst[i] = op_silu(x[j0]) * g[j1]; + } } template -static void cos_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SIN_BLOCK_SIZE - 1) / SYCL_SIN_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SIN_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - cos(x, dst, k, item_ct1); - }); +static void gated_op_fused_geglu_erf(const T * x, const T * g, T * dst, const uint64_t k, const uint64_t n, const uint64_t o0, const uint64_t o1, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + dst[i] = op_gelu_erf(x[j0]) * g[j1]; + } } template -static void leaky_relu_sycl(const T *x, T *dst, const int k, - const float negative_slope, - queue_ptr stream) { - const int num_blocks = (k + SYCL_RELU_BLOCK_SIZE - 1) / SYCL_RELU_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_RELU_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - leaky_relu(x, dst, k, negative_slope, item_ct1); - }); +static void gated_op_fused_geglu_quick(const T * x, const T * g, T * dst, const uint64_t k, const uint64_t n, const uint64_t o0, const uint64_t o1, const sycl::nd_item<1> &item_ct1) { + SYCL_GLOBAL_ID_LOOP(k, item_ct1) { + const int64_t j0 = (i / n) * o0 + (i % n); + const int64_t j1 = o0 == o1 ? j0 : (i / n) * o1 + (i % n); + dst[i] = op_gelu_quick(x[j0]) * g[j1]; + } } -template -static void sqr_sycl(const T *x, T *dst, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_SQR_BLOCK_SIZE - 1) / SYCL_SQR_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_SQR_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - sqr(x, dst, k, item_ct1); +namespace ggml_sycl_detail { +static void acc_f32_sycl(const float *x, const float *y, float *dst, + const int n_elements, const int ne10, const int ne11, + const int ne12, const int nb1, const int nb2, + const int offset, queue_ptr stream) { + int num_blocks = ceil_div(n_elements, SYCL_ACC_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * + sycl::range<1>(SYCL_ACC_BLOCK_SIZE), + sycl::range<1>(SYCL_ACC_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + acc_f32(x, y, dst, n_elements, ne10, ne11, ne12, nb1, nb2, offset, + item_ct1); }); } @@ -612,11 +423,10 @@ static void upscale_sycl(const T *x, T *dst, const int nb00, const int nb01, const int ne12, const int ne13, const float sf0, const float sf1, const float sf2, const float sf3, queue_ptr stream) { int dst_size = ne10 * ne11 * ne12 * ne13; - int num_blocks = (dst_size + SYCL_UPSCALE_BLOCK_SIZE - 1) / SYCL_UPSCALE_BLOCK_SIZE; + int num_blocks = ceil_div(dst_size, SYCL_UPSCALE_BLOCK_SIZE); sycl::range<1> gridDim(num_blocks * SYCL_UPSCALE_BLOCK_SIZE); - stream->parallel_for( - sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), - [=](sycl::nd_item<1> item_ct1) { + sycl_parallel_for<1>( + stream, sycl::nd_range<1>(gridDim, sycl::range<1>(SYCL_UPSCALE_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { upscale(x, dst, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, item_ct1); }); } @@ -625,35 +435,19 @@ template static void pad_sycl(const T *x, T *dst, const int ne00, const int ne01, const int ne02, const int ne0, const int ne1, const int ne2, queue_ptr stream) { - int num_blocks = (ne0 + SYCL_PAD_BLOCK_SIZE - 1) / SYCL_PAD_BLOCK_SIZE; + int num_blocks = ceil_div(ne0, SYCL_PAD_BLOCK_SIZE); sycl::range<3> gridDim(ne2, ne1, num_blocks); - stream->parallel_for( - sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - pad(x, dst, ne0, ne00, ne01, ne02, item_ct1); - }); + sycl_parallel_for(stream, + sycl::nd_range<3>(gridDim * sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE), + sycl::range<3>(1, 1, SYCL_PAD_BLOCK_SIZE)), + [=](sycl::nd_item<3> item_ct1) { pad(x, dst, ne0, ne00, ne01, ne02, item_ct1); }); } -template -static void clamp_sycl(const T *x, T *dst, const float min, - const float max, const int k, - queue_ptr stream) { - const int num_blocks = (k + SYCL_CLAMP_BLOCK_SIZE - 1) / SYCL_CLAMP_BLOCK_SIZE; - stream->parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_blocks) * - sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE), - sycl::range<3>(1, 1, SYCL_CLAMP_BLOCK_SIZE)), - [=](sycl::nd_item<3> item_ct1) { - clamp(x, dst, min, max, k, item_ct1); - }); -} - -inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +template +static inline void dispatch_ggml_sycl_op_unary(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) { #if defined (GGML_SYCL_F16) GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); - #else GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); @@ -666,14 +460,14 @@ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) case GGML_TYPE_F16: { auto data_pts = cast_data(dst); - sgn_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)ggml_nelements(dst->src[0]), main_stream, std::forward(args)...); break; } #endif case GGML_TYPE_F32: { auto data_pts = cast_data(dst); - sgn_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)ggml_nelements(dst->src[0]), main_stream, std::forward(args)...); break; } default: @@ -681,11 +475,11 @@ inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } } -inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +template +static inline void dispatch_ggml_sycl_op_fused_glu(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) { #if defined (GGML_SYCL_F16) GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); - #else GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); @@ -693,19 +487,66 @@ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) GGML_ASSERT(dst->src[0]->type == dst->type); dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + const int64_t nc = src1 ? src0->ne[0] : src0->ne[0] / 2;; + GGML_ASSERT(dst->ne[0] == nc); + GGML_ASSERT(ggml_is_contiguous_1(dst->src[0])); + GGML_ASSERT(ggml_is_contiguous(dst)); + const int32_t swapped = ((const int32_t *) dst->op_params)[1]; + void * src0_d = src0->data; + void * src1_d = src1 ? src1->data : src0->data; + const int64_t src0_o = src0->nb[1]; + const int64_t src1_o = src1 ? src1->nb[1] : src0->nb[1]; + void * dst_d = dst->data; + if (src1) { + GGML_ASSERT(ggml_is_contiguous_1(src1)); + GGML_ASSERT(src1->nb[0] == ggml_element_size(src1)); + GGML_ASSERT(src1->ne[0] == nc); + GGML_ASSERT(src0->type == src1->type); + } switch (dst->type) { #if defined (GGML_SYCL_F16) case GGML_TYPE_F16: { - auto data_pts = cast_data(dst); - abs_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + sycl::half * src0_p = (sycl::half *) src0_d; + sycl::half * src1_p = (sycl::half *) src1_d; + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + kernel_invoker(src0_p, + src1_p, + (sycl::half *) dst_d, + ggml_nelements(dst), + nc, + src0_o / sizeof(sycl::half), + src1_o / sizeof(sycl::half), + main_stream, + std::forward(args)...); break; } #endif case GGML_TYPE_F32: { - auto data_pts = cast_data(dst); - abs_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + float * src0_p = (float *) src0_d; + float * src1_p = (float *) src1_d; + + if (!src1) { + src0_p += swapped ? nc : 0; + src1_p += swapped ? 0 : nc; + } + + kernel_invoker(src0_p, + src1_p, + (float *) dst_d, + ggml_nelements(dst), + nc, + src0_o / sizeof(float), + src1_o / sizeof(float), + main_stream, + std::forward(args)...); break; } default: @@ -713,32 +554,41 @@ inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } } - -inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +template +static inline void dispatch_ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) { #if defined (GGML_SYCL_F16) GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); - #else GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->type == GGML_TYPE_F32); #endif GGML_ASSERT(dst->src[0]->type == dst->type); + dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); + + const float sf0 = (float) dst->ne[0] / dst->src[0]->ne[0]; + const float sf1 = (float) dst->ne[1] / dst->src[0]->ne[1]; + const float sf2 = (float) dst->ne[2] / dst->src[0]->ne[2]; + const float sf3 = (float) dst->ne[3] / dst->src[0]->ne[3]; switch (dst->type) { #if defined (GGML_SYCL_F16) case GGML_TYPE_F16: { auto data_pts = cast_data(dst); - elu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->nb[0], (int)dst->src[0]->nb[1], (int)dst->src[0]->nb[2], + (int)dst->src[0]->nb[3], (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], sf0, sf1, sf2, sf3, + main_stream, std::forward(args)...); break; } #endif case GGML_TYPE_F32: { auto data_pts = cast_data(dst); - elu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->nb[0], (int)dst->src[0]->nb[1], (int)dst->src[0]->nb[2], + (int)dst->src[0]->nb[3], (int)dst->ne[0], (int)dst->ne[1], (int)dst->ne[2], (int)dst->ne[3], sf0, sf1, sf2, sf3, + main_stream, std::forward(args)...); break; } default: @@ -746,7 +596,8 @@ inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) } } -inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { +template +static inline void dispatch_ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst, KernelInvoker kernel_invoker, Args&&... args) { #if defined (GGML_SYCL_F16) GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); @@ -755,6 +606,7 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst GGML_ASSERT(dst->type == GGML_TYPE_F32); #endif GGML_ASSERT(dst->src[0]->type == dst->type); + GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors dpct::queue_ptr main_stream = ctx.stream(); SYCL_CHECK(ggml_sycl_set_device(ctx.device)); switch (dst->type) { @@ -762,14 +614,16 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst case GGML_TYPE_F16: { auto data_pts = cast_data(dst); - silu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->ne[0], (int)dst->src[0]->ne[1], (int)dst->src[0]->ne[2], (int)dst->ne[0], + (int)dst->ne[1], (int)dst->ne[2], main_stream, std::forward(args)...); break; } #endif case GGML_TYPE_F32: { auto data_pts = cast_data(dst); - silu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); + kernel_invoker(data_pts.src, data_pts.dst, (int)dst->src[0]->ne[0], (int)dst->src[0]->ne[1], (int)dst->src[0]->ne[2], (int)dst->ne[0], + (int)dst->ne[1], (int)dst->ne[2], main_stream, std::forward(args)...); break; } default: @@ -777,655 +631,320 @@ inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst } } -inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - gelu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - gelu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +} // namespace ggml_sycl_detail + + + +static inline void ggml_sycl_op_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_sgn_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - gelu_quick_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - gelu_quick_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - gelu_erf_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - - -inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - tanh_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - tanh_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - hardsigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - hardsigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - hardswish_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - hardswish_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - exp_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - exp_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - log_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - log_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - sigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - sigmoid_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } -} - -inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - sqrt_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - sqrt_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_abs_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - sin_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - sin_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, 256); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256), + sycl::range<1>(256)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_elu_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - cos_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - cos_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_silu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SILU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SILU_BLOCK_SIZE), + sycl::range<1>(SYCL_SILU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_silu_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - step_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - step_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_gelu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE), + sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_gelu_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - neg_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - neg_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_gelu_quick(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE), + sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_gelu_quick_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif +static inline void ggml_sycl_op_gelu_erf(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_GELU_BLOCK_SIZE), + sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_gelu_erf_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} - GGML_ASSERT(dst->src[0]->type == dst->type); - float negative_slope; - memcpy(&negative_slope, dst->op_params, sizeof(float)); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - leaky_relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), negative_slope, main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - leaky_relu_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), negative_slope, main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_tanh(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_TANH_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_TANH_BLOCK_SIZE), + sycl::range<1>(SYCL_TANH_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_tanh_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { - #if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - sqr_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - sqr_sycl(data_pts.src, data_pts.dst, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_RELU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_RELU_BLOCK_SIZE), + sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_relu_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); +static inline void ggml_sycl_op_hardsigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_HARDSIGMOID_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_HARDSIGMOID_BLOCK_SIZE), + sycl::range<1>(SYCL_HARDSIGMOID_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_hardsigmoid_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); +static inline void ggml_sycl_op_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_HARDSWISH_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_HARDSWISH_BLOCK_SIZE), + sycl::range<1>(SYCL_HARDSWISH_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_hardswish_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} - const float sf0 = (float) dst->ne[0] / dst->src[0]->ne[0]; - const float sf1 = (float) dst->ne[1] / dst->src[0]->ne[1]; - const float sf2 = (float) dst->ne[2] / dst->src[0]->ne[2]; - const float sf3 = (float) dst->ne[3] / dst->src[0]->ne[3]; - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - upscale_sycl(data_pts.src, data_pts.dst, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], - dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, - main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - upscale_sycl(data_pts.src, data_pts.dst, dst->src[0]->nb[0], dst->src[0]->nb[1], dst->src[0]->nb[2], - dst->src[0]->nb[3], dst->ne[0], dst->ne[1], dst->ne[2], dst->ne[3], sf0, sf1, sf2, sf3, - main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_EXP_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_EXP_BLOCK_SIZE), + sycl::range<1>(SYCL_EXP_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_exp_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined (GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - GGML_ASSERT(dst->src[0]->ne[3] == 1 && dst->ne[3] == 1); // just 3D tensors - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - switch (dst->type) { -#if defined (GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - pad_sycl(data_pts.src, data_pts.dst, dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0], - dst->ne[1], dst->ne[2], main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - pad_sycl(data_pts.src, data_pts.dst, dst->src[0]->ne[0], dst->src[0]->ne[1], dst->src[0]->ne[2], dst->ne[0], - dst->ne[1], dst->ne[2], main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_EXP_BLOCK_SIZE); // Using EXP block size + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_EXP_BLOCK_SIZE), + sycl::range<1>(SYCL_EXP_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_log_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { -#if defined(GGML_SYCL_F16) - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32 || dst->src[0]->type == GGML_TYPE_F16); - GGML_ASSERT(dst->type == GGML_TYPE_F32 || dst->type == GGML_TYPE_F16); -#else +static inline void ggml_sycl_op_neg(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_NEG_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_NEG_BLOCK_SIZE), + sycl::range<1>(SYCL_NEG_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_neg_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} - GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); - GGML_ASSERT(dst->type == GGML_TYPE_F32); -#endif - GGML_ASSERT(dst->src[0]->type == dst->type); - dpct::queue_ptr main_stream = ctx.stream(); - SYCL_CHECK(ggml_sycl_set_device(ctx.device)); - float min; - float max; - memcpy(&min, dst->op_params, sizeof(float)); - memcpy(&max, (float *) dst->op_params + 1, sizeof(float)); +static inline void ggml_sycl_op_step(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_NEG_BLOCK_SIZE); // Using NEG block size + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_NEG_BLOCK_SIZE), + sycl::range<1>(SYCL_NEG_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_step_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} - switch (dst->type) { -#if defined(GGML_SYCL_F16) - case GGML_TYPE_F16: - { - auto data_pts = cast_data(dst); - clamp_sycl(data_pts.src, data_pts.dst, min, max, ggml_nelements(dst->src[0]), main_stream); - break; - } -#endif - case GGML_TYPE_F32: - { - auto data_pts = cast_data(dst); - clamp_sycl(data_pts.src, data_pts.dst, min, max, ggml_nelements(dst->src[0]), main_stream); - break; - } - default: - GGML_ABORT("GGML tensor type not supported!\n"); - } +static inline void ggml_sycl_op_sigmoid(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SIGMOID_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIGMOID_BLOCK_SIZE), + sycl::range<1>(SYCL_SIGMOID_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_sigmoid_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SQRT_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SQRT_BLOCK_SIZE), + sycl::range<1>(SYCL_SQRT_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_sqrt_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SIN_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIN_BLOCK_SIZE), + sycl::range<1>(SYCL_SIN_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_sin_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); } -inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { +static inline void ggml_sycl_op_cos(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SIN_BLOCK_SIZE); // Using SIN block size + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SIN_BLOCK_SIZE), + sycl::range<1>(SYCL_SIN_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_cos_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_leaky_relu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + float negative_slope; + memcpy(&negative_slope, dst->op_params, sizeof(float)); + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float slope) { + const int num_blocks = ceil_div(k_elements, SYCL_RELU_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_RELU_BLOCK_SIZE), + sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_leaky_relu_kernel(src, dst_ptr, k_elements, slope, item_ct1); + }); + }, negative_slope); +} + +static inline void ggml_sycl_op_sqr(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) { + const int num_blocks = ceil_div(k_elements, SYCL_SQR_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_SQR_BLOCK_SIZE), + sycl::range<1>(SYCL_SQR_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + unary_op_sqr_kernel(src, dst_ptr, k_elements, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_upscale(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_upscale(ctx, dst, + [](const auto* src, auto* dst_ptr, int nb00, int nb01, int nb02, int nb03, + int ne10, int ne11, int ne12, int ne13, float sf0, float sf1, float sf2, float sf3, + queue_ptr stream) { + ggml_sycl_detail::upscale_sycl(src, dst_ptr, nb00, nb01, nb02, nb03, ne10, ne11, ne12, ne13, sf0, sf1, sf2, sf3, stream); + }); +} + +static inline void ggml_sycl_op_pad(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_pad(ctx, dst, + [](const auto* src, auto* dst_ptr, int ne00, int ne01, int ne02, int ne0, int ne1, int ne2, + queue_ptr stream) { + ggml_sycl_detail::pad_sycl(src, dst_ptr, ne00, ne01, ne02, ne0, ne1, ne2, stream); + }); +} +static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + float min_val; + float max_val; + memcpy(&min_val, dst->op_params, sizeof(float)); + memcpy(&max_val, (float *) dst->op_params + 1, sizeof(float)); + ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst, + [](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream, float min_arg, float max_arg) { + const int num_blocks = ceil_div(k_elements, SYCL_CLAMP_BLOCK_SIZE); + sycl_parallel_for(stream, + sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE), + sycl::range<1>(SYCL_CLAMP_BLOCK_SIZE)), + [=](sycl::nd_item<1> item_ct1) { + clamp(src, dst_ptr, min_arg, max_arg, k_elements, item_ct1); + }); + }, min_val, max_val); +} + +static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) { GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); GGML_ASSERT(dst->src[1]->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -1441,7 +960,62 @@ inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) // int nb3 = dst->op_params[2] / 4; // 4 bytes of float32 - unused int offset = dst->op_params[3] / 4; // offset in bytes - acc_f32_sycl(src0_dd, src1_dd, dst_dd, ggml_nelements(dst), dst->src[1]->ne[0], dst->src[1]->ne[1], dst->src[1]->ne[2], nb1, nb2, offset, main_stream); + ggml_sycl_detail::acc_f32_sycl(src0_dd, src1_dd, dst_dd, (int)ggml_nelements(dst), (int)dst->src[1]->ne[0], (int)dst->src[1]->ne[1], (int)dst->src[1]->ne[2], nb1, nb2, offset, main_stream); +} + +static inline void ggml_sycl_op_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, + [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { + const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(main_stream, + sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + gated_op_fused_geglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, + [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { + const uint32_t num_blocks = ceil_div((uint32_t)k, SYCL_RELU_BLOCK_SIZE); // Using RELU block size for reglu + sycl_parallel_for(main_stream, + sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), sycl::range<1>(SYCL_RELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + gated_op_fused_reglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, + [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { + const uint32_t num_blocks = ceil_div((uint32_t)k, SYCL_SILU_BLOCK_SIZE); // Using SILU block size for swiglu + sycl_parallel_for(main_stream, + sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_SILU_BLOCK_SIZE)), sycl::range<1>(SYCL_SILU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + gated_op_fused_swiglu(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, + [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { + const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(main_stream, + sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + gated_op_fused_geglu_erf(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1); + }); + }); +} + +static inline void ggml_sycl_op_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + ggml_sycl_detail::dispatch_ggml_sycl_op_fused_glu(ctx, dst, + [](const auto* x_ptr, const auto* g_ptr, auto* dst_ptr, uint64_t k, uint64_t n, uint64_t o0, uint64_t o1, queue_ptr main_stream) { + const uint32_t num_blocks = ceil_div(k, SYCL_GELU_BLOCK_SIZE); + sycl_parallel_for(main_stream, + sycl::nd_range<1>((num_blocks * sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), sycl::range<1>(SYCL_GELU_BLOCK_SIZE)), [=](sycl::nd_item<1> item_ct1) { + gated_op_fused_geglu_quick(x_ptr, g_ptr, dst_ptr, k, n, o0, o1, item_ct1); + }); + }); } @@ -1569,3 +1143,28 @@ void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); ggml_sycl_op_elu(ctx, dst); } + +void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_geglu(ctx, dst); +} + +void ggml_sycl_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_reglu(ctx, dst); +} + +void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_swiglu(ctx, dst); +} + +void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_geglu_erf(ctx, dst); +} + +void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1); + ggml_sycl_op_geglu_quick(ctx, dst); +} diff --git a/ggml/src/ggml-sycl/element_wise.hpp b/ggml/src/ggml-sycl/element_wise.hpp index bd40113f09705..50749e87d783e 100644 --- a/ggml/src/ggml-sycl/element_wise.hpp +++ b/ggml/src/ggml-sycl/element_wise.hpp @@ -3,27 +3,30 @@ #include "common.hpp" #include "ggml.h" -#include +#include // For std::numeric_limits template T neg_infinity() { return -std::numeric_limits::infinity(); } -template +template struct typed_data { - const T * src; - T * dst; + const T_Src * src; + T_Dst * dst; }; -template -typed_data cast_data(ggml_tensor * dst) { +template +typed_data cast_data(ggml_tensor * dst) { return { - /* .src = */ static_cast(dst->src[0]->data), - /* .dst = */ static_cast(dst->data) + /* .src = */ static_cast(dst->src[0]->data), + /* .dst = */ static_cast(dst->data) }; } +const float GELU_QUICK_COEF = -1.702f; + + void ggml_sycl_sqrt(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_sin(ggml_backend_sycl_context & ctx, ggml_tensor * dst); @@ -73,5 +76,11 @@ void ggml_sycl_sgn(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_abs(ggml_backend_sycl_context & ctx, ggml_tensor * dst); void ggml_sycl_elu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); -#endif // GGML_SYCL_ELEMENTWISE_HPP +void ggml_sycl_geglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_reglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_swiglu(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_geglu_erf(ggml_backend_sycl_context & ctx, ggml_tensor * dst); +void ggml_sycl_geglu_quick(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + +#endif // GGML_SYCL_ELEMENTWISE_HPP diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml/src/ggml-sycl/gemm.hpp index 5efe03d364b1b..dcf6c7aeeb4ad 100644 --- a/ggml/src/ggml-sycl/gemm.hpp +++ b/ggml/src/ggml-sycl/gemm.hpp @@ -32,39 +32,28 @@ class DnnlGemmWrapper { else static_assert(0); } - // matrix A has m rows, k columns - // matrix B has k rows, n columns - // nra - number of elements to skip when moving into next row in A - // nrb - number of elements to skip when moving into next row in B - // nca - number of elements to skip when moving into next column in A - // ncb - number of elements to skip when moving into next column in B - // stride_a - number of elements to skip when moving to next A matrix - // stride_b - number of elements to skip when moving to next B matrix - // batches_a - number of A matrices - // batches_b - number of B matrices static void gemm(ggml_backend_sycl_context & ctx, int m, int n, int k, - const void * a, dt at, dnnl_dim_t nra, dnnl_dim_t nca, dnnl_dim_t stride_a, - const void * b, dt bt, dnnl_dim_t nrb, dnnl_dim_t ncb, dnnl_dim_t stride_b, + const void * a, dt at, dnnl_dim_t stra0, dnnl_dim_t stra1, dnnl_dim_t stra2, + const void * b, dt bt, dnnl_dim_t strb0, dnnl_dim_t strb1, dnnl_dim_t strb2, void * c, dt ct, const queue_ptr & q, dnnl_dim_t batches_a, dnnl_dim_t batches_b) { auto stream = ctx.stream_dnnl(q); auto eng = ctx.engine_dnnl(q); - // { # strides, # rows, # columns } - dnnl::memory::dims a_dims = { batches_a, m, k }; - dnnl::memory::dims b_dims = { batches_b, k, n }; - dnnl::memory::dims c_dims = { std::max(batches_a, batches_b), m, n }; - - // { # elements to skip to next stride, # elements to skip to next row, # elements to skip to next column } - dnnl::memory::dims a_strides = { stride_a, nra, nca }; - dnnl::memory::dims b_strides = { stride_b, nrb, ncb }; - + dnnl::memory::dims a_dims = {batches_a, m, k }; + dnnl::memory::dims a_strides = {stra2, stra1, stra0}; const auto a_in_md = dnnl::memory::desc(a_dims, at, a_strides); + + dnnl::memory::dims b_dims = {batches_b, k, n }; + dnnl::memory::dims b_strides = {strb2, strb0, strb1}; const auto b_in_md = dnnl::memory::desc(b_dims, bt, b_strides); - const auto c_md = dnnl::memory::desc(c_dims, ct, tag::abc); + dnnl::memory::dims c_dims = { std::max(batches_a, batches_b), m, n}; + dnnl::memory::dims c_strides = {m*n, 1, m }; + const auto c_md = dnnl::memory::desc(c_dims, ct, c_strides); dnnl::primitive_attr primitive_attr; primitive_attr.set_scratchpad_mode(dnnl::scratchpad_mode::user); + #ifdef GGML_SYCL_F16 primitive_attr.set_fpmath_mode(dnnl::fpmath_mode::f16); #endif @@ -76,24 +65,23 @@ class DnnlGemmWrapper { auto scratchpad_md = matmul_pd.scratchpad_desc(); auto scratchpad_mem = ctx.get_scratchpad_mem(scratchpad_md, eng, q); + auto matmul_prim = dnnl::matmul(matmul_pd); std::unordered_map matmul_args; matmul_args.insert({ DNNL_ARG_SRC, a_mem }); matmul_args.insert({ DNNL_ARG_WEIGHTS, b_mem }); + matmul_args.insert({ DNNL_ARG_DST, c_mem }); matmul_args.insert({ DNNL_ARG_SCRATCHPAD, scratchpad_mem }); matmul_prim.execute(stream, matmul_args); } - // matrices A and B are column major, both having k rows - // matrix A has m column, matrix B has n columns - // output: column major matrix C = A transposed * B static void row_gemm(ggml_backend_sycl_context & ctx, int m, int n, int k, const void * a, dt at, const void * b, dt bt, void * c, dt ct, const queue_ptr & q) { - gemm(ctx, m, n, k, a, at, k, 1, k * m, b, bt, 1, k, n * k, c, ct, q, 1, 1); + gemm(ctx, m, n, k, a, at, 1, k, k * m, b, bt, 1, k, n * k, c, ct, q, 1, 1); } }; diff --git a/ggml/src/ggml-sycl/getrows.cpp b/ggml/src/ggml-sycl/getrows.cpp index 4a7712781364e..9c76ffeb9508a 100644 --- a/ggml/src/ggml-sycl/getrows.cpp +++ b/ggml/src/ggml-sycl/getrows.cpp @@ -60,54 +60,6 @@ static void k_get_rows( dst_row[iybs + iqs + y_offset] = v.y(); } -template -static void k_get_rows_reorder( - const void * src0, const void *src0_dq, const int32_t * src1, dst_t * dst, - int64_t ne00, /*int64_t ne01, int64_t ne02, int64_t ne03,*/ - /*int64_t ne10, int64_t ne11,*/ int64_t ne12, /*int64_t ne13,*/ - /*size_t s0,*/ size_t s1, size_t s2, size_t s3, - /*size_t nb00,*/ size_t nb01, size_t nb02, size_t nb03, - size_t s10, size_t s11, size_t s12, - const sycl::nd_item<3> &item_ct1/*, size_t s13*/) { - - const int i00 = (item_ct1.get_group(2) * item_ct1.get_local_range(2) + - item_ct1.get_local_id(2)) * - 2; - const int i10 = item_ct1.get_local_range(1) * item_ct1.get_group(1) + - item_ct1.get_local_id(1); - const int i11 = (item_ct1.get_group(0) * item_ct1.get_local_range(0) + - item_ct1.get_local_id(0)) / - ne12; - const int i12 = (item_ct1.get_group(0) * item_ct1.get_local_range(0) + - item_ct1.get_local_id(0)) % - ne12; - - if (i00 >= ne00) { - return; - } - auto ncols = ne00; - const int i01 = src1[i10*s10 + i11*s11 + i12*s12]; - - dst_t * dst_row = dst + i10*s1 + i11*s2 + i12*s3; - - const int src0_off = i01 * ncols + i00; - const int ib = src0_off / QK4_0; // block index - const int iqs = (i00%qk)/qr; // x quant index - const int iybs = i00 - i00%qk; // dst block start index - const int y_offset = qr == 1 ? 1 : qk/2; - - // dequantize - dfloat2 v; - dequantize_kernel_recorder((const void *)src0_dq, ib, (const void *)src0, src0_off/2, v); - - dst_row[iybs + iqs + 0] = v.x(); - dst_row[iybs + iqs + y_offset] = v.y(); - - GGML_UNUSED(nb01); - GGML_UNUSED(nb02); - GGML_UNUSED(nb03); -} - template static void k_get_rows_float( const src0_t * src0, const int32_t * src1, dst_t * dst, @@ -166,58 +118,15 @@ static void get_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor *sr GGML_ASSERT(ne00 % 2 == 0); - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { - k_get_rows( - src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2, - s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); - }); - - GGML_UNUSED(dst); - GGML_UNUSED(ctx); -} - -template -static void get_rows_sycl_reorder(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, - ggml_tensor *dst, const void *src0_dd, - const int32_t *src1_dd, float *dst_dd, - queue_ptr stream) { - - GGML_TENSOR_BINARY_OP_LOCALS - - const sycl::range<3> block_dims(1, 1, SYCL_GET_ROWS_BLOCK_SIZE); - const int block_num_x = (ne00 + 2*SYCL_GET_ROWS_BLOCK_SIZE - 1) / (2*SYCL_GET_ROWS_BLOCK_SIZE); - const sycl::range<3> block_nums(ne11 * ne12, ne10, block_num_x); - - // strides in elements - //const size_t s0 = nb0 / ggml_element_size(dst); - const size_t s1 = nb1 / ggml_element_size(dst); - const size_t s2 = nb2 / ggml_element_size(dst); - const size_t s3 = nb3 / ggml_element_size(dst); - - const size_t s10 = nb10 / ggml_element_size(src1); - const size_t s11 = nb11 / ggml_element_size(src1); - const size_t s12 = nb12 / ggml_element_size(src1); - //const size_t s13 = nb13 / ggml_element_size(src1); - - GGML_ASSERT(ne00 % 2 == 0); - - const uint8_t* src0_q = (const uint8_t*)src0_dd; - const size_t ncols = ne00; - const size_t nrows = ne01; - const sycl::half* src0_dq = (const sycl::half*)(src0_q + nrows * ncols / 2); - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]]{ - k_get_rows_reorder( - src0_dd, src0_dq, src1_dd, dst_dd, ne00, ne12, s1, s2, - s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { + k_get_rows(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2, s3, nb01, nb02, nb03, s10, s11, s12, + item_ct1); + }); GGML_UNUSED(dst); GGML_UNUSED(ctx); } - template static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tensor *src0, const ggml_tensor *src1, ggml_tensor *dst, @@ -245,9 +154,8 @@ static void get_rows_sycl_float(ggml_backend_sycl_context & ctx, const ggml_tens dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_get_rows_float(src0_dd, src1_dd, dst_dd, ne00, ne12, s1, s2, s3, nb01, nb02, nb03, s10, s11, s12, item_ct1); }); @@ -277,13 +185,8 @@ void ggml_sycl_op_get_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q4_0: - if (ctx.opt_feature.reorder && dst->op == GGML_OP_MUL_MAT) { - get_rows_sycl_reorder(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, - src1_i32, (float *)dst->data, ctx.stream()); - } else { - get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, - src1_i32, (float *)dst->data, ctx.stream()); - } + get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, + src1_i32, (float *)dst->data, ctx.stream()); break; case GGML_TYPE_Q4_1: get_rows_sycl(ctx, dst->src[0], dst->src[1], dst, (const float *)dst->src[0]->data, diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 4b7610362b608..872eb4b052db9 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -41,6 +41,7 @@ #include "ggml-sycl/element_wise.hpp" #include "ggml-sycl/presets.hpp" #include "ggml-sycl/gemm.hpp" +#include "ggml-sycl/set_rows.hpp" #include "ggml-sycl/sycl_hw.hpp" #include "ggml-sycl/getrows.hpp" #include "ggml.h" @@ -83,9 +84,7 @@ static ggml_sycl_device_info ggml_sycl_init() { info.devices[i].cc = 100 * prop.get_major_version() + 10 * prop.get_minor_version(); - info.devices[i].hw_info = get_device_hw_info(&device); - info.devices[i].opt_feature = check_gpu_optimize_feature(info.devices[i].hw_info.arch); - + info.devices[i].opt_feature.reorder = device.ext_oneapi_architecture_is(syclex::arch_category::intel_gpu); info.max_work_group_sizes[i] = prop.get_max_work_group_size(); } @@ -195,7 +194,7 @@ static void ggml_check_sycl() try { if (!initialized) { g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0); - g_ggml_sycl_disable_optimize= get_sycl_env("GGML_SYCL_DISABLE_OPT", 1); + g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0); g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1); g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0); g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0); @@ -1547,7 +1546,7 @@ static void mul_mat_p021_f16_f32( static void mul_mat_vec_nc_f16_f32( // nc == non-contiguous const void * __restrict__ vx, const float * __restrict__ y, float * __restrict__ dst, const int ncols_x, const int nrows_x, - const int row_stride_x, const int channel_stride_x, const int channel_x_divisor, + const int row_stride_x, const int channel_stride_x,const int channel_stride_y, const int channel_x_divisor, const sycl::nd_item<3> &item_ct1) { const sycl::half *x = (const sycl::half *)vx; @@ -1558,7 +1557,6 @@ static void mul_mat_vec_nc_f16_f32( // nc == non-contiguous item_ct1.get_local_id(0); const int channel_x = channel / channel_x_divisor; - const int nrows_y = ncols_x; const int nrows_dst = nrows_x; const int row_dst = row_x; @@ -1577,7 +1575,7 @@ static void mul_mat_vec_nc_f16_f32( // nc == non-contiguous const int row_y = col_x; const int ix = channel_x*channel_stride_x + row_x*row_stride_x + col_x; - const int iy = channel*nrows_y + row_y; + const int iy = channel * channel_stride_y + row_y; const float xi = sycl::vec(x[ix]) @@ -1697,7 +1695,7 @@ static void diag_mask_inf_f32(const float * x, float * dst, const int ncols, con dst[i] = x[i] - (col > n_past + row % rows_per_channel) * FLT_MAX; } -static void scale_f32(const float * x, float * dst, const float scale, const int k, +static void scale_f32(const float * x, float * dst, const float scale, const float bias, const int k, const sycl::nd_item<3> &item_ct1) { const int i = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); @@ -1706,7 +1704,7 @@ static void scale_f32(const float * x, float * dst, const float scale, const int return; } - dst[i] = scale * x[i]; + dst[i] = scale * x[i] + bias; } @@ -1824,7 +1822,7 @@ static void ggml_mul_mat_p021_f16_f32_sycl(const void *vx, const float *y, static void ggml_mul_mat_vec_nc_f16_f32_sycl( const void *vx, const float *y, float *dst, const int ncols_x, const int nrows_x, const int row_stride_x, const int nchannels_x, - const int nchannels_y, const int channel_stride_x, queue_ptr stream) { + const int nchannels_y, const int channel_stride_x, const int channel_stride_y, queue_ptr stream) { const sycl::range<3> block_nums(nchannels_y, nrows_x, 1); const sycl::range<3> block_dims(1, 1, WARP_SIZE); @@ -1836,7 +1834,7 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl( sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { mul_mat_vec_nc_f16_f32(vx, y, dst, ncols_x, nrows_x, - row_stride_x, channel_stride_x, + row_stride_x, channel_stride_x, channel_stride_y, nchannels_y / nchannels_x, item_ct1); }); } @@ -1844,7 +1842,7 @@ static void ggml_mul_mat_vec_nc_f16_f32_sycl( -static void scale_f32_sycl(const float *x, float *dst, const float scale, +static void scale_f32_sycl(const float *x, float *dst, const float scale, const float bias, const int k, queue_ptr stream) { const int num_blocks = (k + SYCL_SCALE_BLOCK_SIZE - 1) / SYCL_SCALE_BLOCK_SIZE; stream->parallel_for( @@ -1852,7 +1850,7 @@ static void scale_f32_sycl(const float *x, float *dst, const float scale, sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE), sycl::range<3>(1, 1, SYCL_SCALE_BLOCK_SIZE)), [=](sycl::nd_item<3> item_ct1) { - scale_f32(x, dst, scale, k, item_ct1); + scale_f32(x, dst, scale, bias, k, item_ct1); }); } @@ -1887,13 +1885,12 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols, const size_t shared_mem = ncols_pad * sizeof(int); if (order == GGML_SORT_ORDER_ASC) { - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor dpct_local_acc_ct1( sycl::range<1>(shared_mem), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_argsort_f32_i32( x, dst, ncols, ncols_pad, item_ct1, dpct_local_acc_ct1.get_multi_ptr() @@ -1901,13 +1898,12 @@ static void argsort_f32_i32_sycl(const float *x, int *dst, const int ncols, }); }); } else if (order == GGML_SORT_ORDER_DESC) { - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor dpct_local_acc_ct1( sycl::range<1>(shared_mem), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_argsort_f32_i32( x, dst, ncols, ncols_pad, item_ct1, dpct_local_acc_ct1.get_multi_ptr() @@ -1925,50 +1921,47 @@ static void argmax_f32_i32_sycl(const float *x, int *dst, const int ncols, const sycl::range<3> block_nums(1, nrows, 1); const size_t shared_mem = 256 * sizeof(float); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor shared_data( sycl::range<1>(shared_mem/sizeof(float)), cgh); sycl::local_accessor shared_indices( sycl::range<1>(shared_mem/sizeof(float)), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { - const int tid = item_ct1.get_local_id(2); - const int row = item_ct1.get_global_id(1); - - float max_val = -INFINITY; - int max_idx = -1; - - for (int col = tid; col < ncols; col += 256) { - float val = x[row * ncols + col]; - if (val > max_val) { - max_val = val; - max_idx = col; - } - } + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { + const int tid = item_ct1.get_local_id(2); + const int row = item_ct1.get_global_id(1); - shared_data[tid] = max_val; - shared_indices[tid] = max_idx; - item_ct1.barrier(sycl::access::fence_space::local_space); + float max_val = -INFINITY; + int max_idx = -1; - for (int stride = 256/2; stride > 0; stride >>= 1) { - if (tid < stride) { - float val1 = shared_data[tid]; - float val2 = shared_data[tid + stride]; - if (val2 > val1) { - shared_data[tid] = val2; - shared_indices[tid] = shared_indices[tid + stride]; - } - } - item_ct1.barrier(sycl::access::fence_space::local_space); + for (int col = tid; col < ncols; col += 256) { + float val = x[row * ncols + col]; + if (val > max_val) { + max_val = val; + max_idx = col; } + } + shared_data[tid] = max_val; + shared_indices[tid] = max_idx; + item_ct1.barrier(sycl::access::fence_space::local_space); - if (tid == 0) { - dst[row] = shared_indices[0]; + for (int stride = 256 / 2; stride > 0; stride >>= 1) { + if (tid < stride) { + float val1 = shared_data[tid]; + float val2 = shared_data[tid + stride]; + if (val2 > val1) { + shared_data[tid] = val2; + shared_indices[tid] = shared_indices[tid + stride]; + } } - }); + item_ct1.barrier(sycl::access::fence_space::local_space); + } + + if (tid == 0) { + dst[row] = shared_indices[0]; + } + }); }); } static void diag_mask_inf_f32_sycl(const float *x, float *dst, @@ -2130,8 +2123,8 @@ inline void ggml_sycl_op_mul_mat_sycl( #if GGML_SYCL_DNNL if (!g_ggml_sycl_disable_dnn) { - DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ptr, - DnnlGemmWrapper::to_dt(), src0_ptr, DnnlGemmWrapper::to_dt(), + DnnlGemmWrapper::row_gemm(ctx,row_diff, src1_ncols , ne10, src0_ptr, + DnnlGemmWrapper::to_dt(), src1_ptr, DnnlGemmWrapper::to_dt(), dst_dd_i, DnnlGemmWrapper::to_dt(), stream); } else @@ -2177,8 +2170,8 @@ inline void ggml_sycl_op_mul_mat_sycl( #if GGML_SYCL_DNNL if (!g_ggml_sycl_disable_dnn) { - DnnlGemmWrapper::row_gemm(ctx, src1_ncols, row_diff, ne10, src1_ddf1_i, - DnnlGemmWrapper::to_dt(), src0_ddf_i, DnnlGemmWrapper::to_dt(), + DnnlGemmWrapper::row_gemm(ctx, row_diff, src1_ncols, ne10, src0_ddf_i, + DnnlGemmWrapper::to_dt(), src1_ddf1_i, DnnlGemmWrapper::to_dt(), dst_dd_i, DnnlGemmWrapper::to_dt(), stream); } else @@ -2326,9 +2319,11 @@ inline void ggml_sycl_op_scale(ggml_backend_sycl_context & ctx, ggml_tensor * ds float * dst_dd = static_cast(dst->data); float scale; - memcpy(&scale, dst->op_params, sizeof(float)); + float bias; + memcpy(&scale, (float *) dst->op_params + 0, sizeof(float)); + memcpy(&bias, (float *) dst->op_params + 1, sizeof(float)); - scale_f32_sycl(src0_dd, dst_dd, scale, ggml_nelements(dst->src[0]), main_stream); + scale_f32_sycl(src0_dd, dst_dd, scale, bias, ggml_nelements(dst->src[0]), main_stream); /* DPCT1010:87: SYCL uses exceptions to report errors and does not use the error codes. The call was replaced with 0. You need to rewrite this code. @@ -2780,6 +2775,7 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml const int64_t nb02 = src0->nb[2]; const int64_t ne12 = src1->ne[2]; + const int64_t nb11 = src1->nb[1]; SYCL_CHECK(ggml_sycl_set_device(ctx.device)); queue_ptr main_stream = ctx.stream(); @@ -2790,8 +2786,9 @@ static void ggml_sycl_mul_mat_vec_nc(ggml_backend_sycl_context & ctx, const ggml const int64_t row_stride_x = nb01 / sizeof(sycl::half); const int64_t channel_stride_x = nb02 / sizeof(sycl::half); + const int64_t channel_stride_y = nb11 / sizeof(float); - ggml_mul_mat_vec_nc_f16_f32_sycl(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, row_stride_x, ne02, ne12, channel_stride_x, main_stream); + ggml_mul_mat_vec_nc_f16_f32_sycl(src0_ddq, src1_ddf, dst_ddf, ne00, ne01, row_stride_x, ne02, ne12, channel_stride_x,channel_stride_y, main_stream); } catch (sycl::exception const &exc) { std::cerr << exc.what() << "Exception caught at file:" << __FILE__ @@ -2845,8 +2842,8 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons float * dst_ddf = static_cast(dst->data); const sycl::half * src1_f16 = static_cast(src1->data); + const size_t type_size_src0 = ggml_type_size(src0->type); const size_t type_size_src1 = ggml_type_size(src1->type); - GGML_ASSERT(nb10 == type_size_src1); // SRC1 strides int64_t s11 = nb11 / type_size_src1; @@ -2858,11 +2855,40 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons if (src1->type != GGML_TYPE_F16) { scope_op_debug_print scope_dbg_print(__func__, "/to_fp16_nc_sycl", dst, /*num_src=*/2, " : converting src1 to fp16"); - const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type); - GGML_ASSERT(to_fp16_nc_sycl != nullptr); + + // iterate tensor dims and find the slowest moving dim and stride + int64_t last_dim=0; + int64_t last_str=0; + int64_t largest_str=0; + for(int i = 0; i< 4; i++){ + // last stride is always the largest + if(src1->nb[i] == largest_str){ + if(src1->ne[last_dim] == 1){ + last_str = i; + last_dim = i; + } + } + if(src1->nb[i] > largest_str){ + largest_str = src1->nb[i]; + last_str = i; + last_dim = i; + } + + } +#if GGML_SYCL_DNNL + // oneDNN handles strided data and does not need overhead of get_to_fp16_nc_sycl + const int64_t ne_src1 = src1->nb[last_str] * src1->ne[last_dim] / type_size_src1; + src1_f16_alloc.alloc(ne_src1); + const to_fp16_sycl_t to_fp16_sycl = ggml_get_to_fp16_sycl(src1->type, dst); + GGML_ASSERT(to_fp16_sycl != nullptr); + to_fp16_sycl(src1_f16, src1_f16_alloc.get(), ne_src1, queue); +# else const int64_t ne_src1 = ggml_nelements(src1); src1_f16_alloc.alloc(ne_src1); + const to_fp16_nc_sycl_t to_fp16_nc_sycl = get_to_fp16_nc_sycl(src1->type); + GGML_ASSERT(to_fp16_nc_sycl != nullptr); to_fp16_nc_sycl(src1_f16, src1_f16_alloc.get(), ne10, ne11, ne12, ne13, s11, s12, s13, queue); +#endif src1_f16 = src1_f16_alloc.get(); s11 = ne10; @@ -2896,38 +2922,89 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons #if GGML_SYCL_DNNL if (!g_ggml_sycl_disable_dnn) { - auto dnn_gemm = [&ctx, queue, ne11, ne01, ne10, nb00, nb01, nb02, s11, s12] - (const sycl::half* src1, const sycl::half* src0, float* dst, const dnnl_dim_t batches_a, const dnnl_dim_t batches_b) { - - DnnlGemmWrapper::gemm(ctx, ne11,ne01, ne10, - src1, DnnlGemmWrapper::to_dt(), s11, 1, s12, - src0, DnnlGemmWrapper::to_dt(), 1, nb01/nb00, nb02/nb00, - dst, DnnlGemmWrapper::to_dt(), queue, batches_a, batches_b); - }; - - if (r2 == 1 && r3 == 1) { - if (ggml_is_contiguous_2(src0) && ggml_is_contiguous_2(src1)) { - dnn_gemm(src1_f16, src0_f16, dst_ddf, ne12*ne13, ne02 * ne03); - } - else { - for (int64_t ie03 = 0; ie03 < ne03; ++ie03) { - const sycl::half* src0_f16_shifted = src0_f16 + ((ie03*nb03)/sizeof(sycl::half)); // nb is in bytes - const sycl::half* src1_f16_shifted = src1_f16 + ie03*s13; - float* dst_shifted = dst_ddf + ((ie03*nb3)/sizeof(float)); - dnn_gemm(src1_f16_shifted, src0_f16_shifted, dst_shifted, ne12, ne02); + int64_t str_a0 = nb00 / type_size_src0; + int64_t str_a1 = nb01 / type_size_src0; + int64_t str_a2 = nb02 / type_size_src0; + + int64_t str_b0 = nb10 / type_size_src1; + int64_t str_b1 = nb11 / type_size_src1; + int64_t str_b2 = nb12 / type_size_src1; + + auto launch_gemm_for_batches = [&ctx, queue](const sycl::half *src0, + const sycl::half *src1, float *dst, + int64_t a0, int64_t a1, int64_t batcha, + int64_t b0, int64_t b1, int64_t batchb, + int64_t sa0, int64_t sa1, int64_t sa2, + int64_t sb0, int64_t sb1, int64_t sb2, + int64_t sd2) { + bool supported_broadcast = batchb == batcha ? true + : batchb == 1 || batcha == 1 ? true + : false; + if (supported_broadcast) { + DnnlGemmWrapper::gemm(ctx, a1, b1, a0, src0, + DnnlGemmWrapper::to_dt(), sa0, sa1, sa2, src1, + DnnlGemmWrapper::to_dt(), sb0, sb1, sb2, dst, + DnnlGemmWrapper::to_dt(), queue, batcha, batchb); + } else { + // iterate over batches from smaller set of matrices (matrix 0) + int64_t batches0 = batcha; + int64_t batches1 = batchb; + + if (batches0 > batches1) { + int64_t num_mul_mats = batches1; + int64_t sub_batch = batches0 / num_mul_mats; + // src0 is batched and bigger, shift and multiply with src1 + for (int64_t i0 = 0; i0 < num_mul_mats; i0++) { + const sycl::half *src0_shifted = src0 + (sa2 * i0 * sub_batch); + const sycl::half *src1_shifted = src1 + (sb2 * i0); + float *dst_shifted = dst + (sd2 * i0 * sub_batch); + DnnlGemmWrapper::gemm(ctx, a1, b1, a0, src0_shifted, + DnnlGemmWrapper::to_dt(), sa0, sa1, sa2, + src1_shifted, DnnlGemmWrapper::to_dt(), sb0, + sb1, sb2, dst_shifted, DnnlGemmWrapper::to_dt(), + queue, sub_batch, 1); + } + } else { + int64_t num_mul_mats = batches0; + int64_t sub_batch = batches1 / num_mul_mats; + // src1 is batched and bigger, shift and multiply with src0 + for (int64_t i1 = 0; i1 < num_mul_mats; i1++) { + const sycl::half *src0_shifted = src0 + (sa2 * i1); + const sycl::half *src1_shifted = src1 + (sb2 * i1 * sub_batch); + float *dst_shifted = dst + (sd2 * i1 * sub_batch); + DnnlGemmWrapper::gemm(ctx, a1, b1, a0, src0_shifted, + DnnlGemmWrapper::to_dt(), sa0, sa1, sa2, + src1_shifted, DnnlGemmWrapper::to_dt(), sb0, + sb1, sb2, dst_shifted, DnnlGemmWrapper::to_dt(), + queue, 1, sub_batch); + } + } } - } - } else { - // iterate over batches from smaller set of matrices (matrix 0) - for (int64_t ie02 = 0; ie02 < ne02; ++ie02) { - for (int64_t ie03 = 0; ie03 < ne03; ++ie03) { - const sycl::half* src0_f16_shifted = src0_f16 + ((ie02*nb02 + ie03*nb03)/sizeof(sycl::half)); - const sycl::half* src1_f16_shifted = src1_f16 + ie02*s12*r2 + ie03*s13*r3; - float* dst_shifted = dst_ddf + ((ie02*nb2*r2 + ie03*nb3*r3)/sizeof(float)); - dnn_gemm(src1_f16_shifted, src0_f16_shifted, dst_shifted, r2*r3, 1); + }; + + bool cont_batches_a = nb02 * ne02 == nb03; + bool cont_batches_b = nb12 * ne12 == nb13; + if (cont_batches_a && cont_batches_b) { + int64_t batches0 = ne02 * ne03; + int64_t batches1 = ne12 * ne13; + launch_gemm_for_batches(src0_f16, src1_f16, dst_ddf, ne00, ne01, batches0, + ne10, ne11, batches1, str_a0, str_a1, str_a2, str_b0, str_b1, + str_b2, nb2 / sizeof(float)); + } else { + for (int64_t b_a = 0; b_a < ne03; b_a++) { + const sycl::half *src0_f16_shifted + = src0_f16 + (nb03 * b_a / type_size_src0); + const sycl::half *src1_f16_shifted + = src1_f16 + (nb13 * b_a / type_size_src1); + float *dst_shifted = dst_ddf + (nb3 * b_a / sizeof(float)); + int64_t batches0 = ne02; + int64_t batches1 = ne12; + launch_gemm_for_batches(src0_f16_shifted, src1_f16_shifted, dst_shifted, + ne00, ne01, batches0, ne10, ne11, batches1, str_a0, str_a1, + str_a2, str_b0, str_b1, str_b2, nb2 / sizeof(float)); } } - } + } else #endif @@ -2952,7 +3029,7 @@ static void ggml_sycl_mul_mat_batched_sycl(ggml_backend_sycl_context & ctx, cons void ** ptrs_dst_get = ptrs_dst.get(); size_t nb12_scaled = src1->type == GGML_TYPE_F16 ? nb12 : s12 * sizeof(sycl::half); size_t nb13_scaled = src1->type == GGML_TYPE_F16 ? nb13 : s13 * sizeof(sycl::half); - cgh.parallel_for(sycl::nd_range<3>(block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_compute_batched_ptrs(src0_f16, src1_f16, dst_ddf, ptrs_src_get, ptrs_dst_get, ne12, ne13, ne23, nb02, nb03, nb12_scaled, nb13_scaled, nbd2, nbd3, r2, r3, item_ct1); }); @@ -3267,10 +3344,10 @@ static void ggml_sycl_mul_mat(ggml_backend_sycl_context & ctx, const ggml_tensor // The kernel from the if path is faster for that specific case, but does not support all mul mats. ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); } - } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && ggml_is_contiguous(src1) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { + } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_contiguous(src0) && !ggml_is_transposed(src1) && src1->ne[1] == 1) { // KQV single-batch ggml_sycl_mul_mat_vec_nc(ctx, src0, src1, dst); - } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2]*src1->ne[3] > 1) { + } else if (!split && src0->type == GGML_TYPE_F16 && !ggml_is_transposed(src0) && !ggml_is_transposed(src1) && src1->ne[2] * src1->ne[3] > 1) { // KQ + KQV multi-batch ggml_sycl_mul_mat_batched_sycl(ctx, src0, src1, dst); } else if (use_dequantize_mul_mat_vec) { @@ -3453,10 +3530,13 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, SYCL_CHECK(CHECK_TRY_ERROR( stream->memset(dev_cur_src1_row.get(), 0, sizeof(int)))); + const unsigned int max_work_group_size = ggml_sycl_info().max_work_group_sizes[ctx.device]; + assert(work_group_size % (WARP_SIZE * WARP_SIZE) == 0); + { - sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne10, 768u)); + sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne10, max_work_group_size)); sycl::range<3> grid_dims(1, n_ids, ids->ne[1]); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor src1_row_acc(cgh); char *__restrict src1_contiguous_get = @@ -3468,9 +3548,8 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, size_t ids_nb_ct6 = ids->nb[1]; size_t ids_nb_ct7 = ids->nb[0]; - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_copy_src1_to_contiguous( src1_original, src1_contiguous_get, dev_cur_src1_row_get, @@ -3499,17 +3578,16 @@ static void ggml_sycl_mul_mat_id(ggml_backend_sycl_context & ctx, ggml_sycl_mul_mat(ctx, &src0_row, &src1_row, &dst_row); { - sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne0, 768u)); + sycl::range<3> block_dims(1, 1, std::min((unsigned int)ne0, max_work_group_size)); sycl::range<3> grid_dims(1, 1, num_src1_rows); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { const char *__restrict dst_contiguous_get = dst_contiguous.get(); const mmid_row_mapping *__restrict dev_row_mapping_get = dev_row_mapping.get(); - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { k_copy_dst_from_contiguous(dst_original, dst_contiguous_get, dev_row_mapping_get, @@ -3612,6 +3690,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg case GGML_OP_GET_ROWS: ggml_sycl_get_rows(ctx, dst); break; + case GGML_OP_SET_ROWS: + ggml_sycl_op_set_rows(ctx, dst); + break; case GGML_OP_DUP: ggml_sycl_dup(ctx, dst); break; @@ -3685,6 +3766,27 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(dst)) { + case GGML_GLU_OP_REGLU: + ggml_sycl_reglu(ctx, dst); + break; + case GGML_GLU_OP_GEGLU: + ggml_sycl_geglu(ctx, dst); + break; + case GGML_GLU_OP_SWIGLU: + ggml_sycl_swiglu(ctx, dst); + break; + case GGML_GLU_OP_GEGLU_ERF: + ggml_sycl_geglu_erf(ctx, dst); + break; + case GGML_GLU_OP_GEGLU_QUICK: + ggml_sycl_geglu_quick(ctx, dst); + break; + default: + return false; + } + break; case GGML_OP_NORM: ggml_sycl_norm(ctx, dst); break; @@ -4221,6 +4323,18 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g default: return false; } + case GGML_OP_GLU: + switch (ggml_get_glu_op(op)) { + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + return ggml_is_contiguous_1(op->src[0]); + default: + return false; + } + break; case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: { @@ -4269,6 +4383,13 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g return false; } } + case GGML_OP_SET_ROWS: + { + // TODO: add support + // ref: https://github.com/ggml-org/llama.cpp/pull/14274 +#pragma message("TODO: implement BF16, Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, IQ4_NL support (https://github.com/ggml-org/llama.cpp/pull/14661)") + return (op->type == GGML_TYPE_F32 || (op->type == GGML_TYPE_F16 && op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_I64)); + } break; case GGML_OP_CPY: { ggml_type src0_type = op->src[0]->type; @@ -4379,9 +4500,15 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g return true; case GGML_OP_CONT: return op->src[0]->type != GGML_TYPE_BF16; - case GGML_OP_DIAG_MASK_INF: case GGML_OP_SOFT_MAX: - return true; + // TODO: support batching + if (op->src[0]->ne[3] != 1) { + return false; + } + // TODO: support broadcast + // ref: https://github.com/ggml-org/llama.cpp/pull/14435 + return !op->src[1] || (op->src[1]->ne[2] == 1 && op->src[1]->ne[3] == 1); + case GGML_OP_DIAG_MASK_INF: case GGML_OP_ROPE: case GGML_OP_IM2COL: return true; diff --git a/ggml/src/ggml-sycl/gla.cpp b/ggml/src/ggml-sycl/gla.cpp index 879184fdd3111..b40cbf1f14fb2 100644 --- a/ggml/src/ggml-sycl/gla.cpp +++ b/ggml/src/ggml-sycl/gla.cpp @@ -11,13 +11,13 @@ static void gated_linear_attn_f32_kernel(const dpct::queue_ptr stream, u_int B, const u_int n_seq_tokens = T / B; sycl::range<1> block_dims((C / H)); sycl::range<1> grid_dims((B * H)); - stream->submit([&](sycl::handler & cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { /* local memory accessors*/ auto _k = sycl::local_accessor(sycl::range<1>(head_size), cgh); auto _r = sycl::local_accessor(sycl::range<1>(head_size), cgh); auto _td = sycl::local_accessor(sycl::range<1>(head_size), cgh); - cgh.parallel_for(sycl::nd_range<1>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<1> item) { + sycl_parallel_for<1>(cgh, sycl::nd_range<1>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<1> item) { u_int tid = item.get_local_id(0); u_int bid = item.get_group(0); diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml/src/ggml-sycl/im2col.cpp index aa19c2527dc41..52737cc746dfa 100644 --- a/ggml/src/ggml-sycl/im2col.cpp +++ b/ggml/src/ggml-sycl/im2col.cpp @@ -70,7 +70,7 @@ static void im2col_sycl_internal(const float * x, T * dst, int64_t IW, int64_t I const int64_t CHW = IC * KH * KW; - stream->parallel_for(sycl::nd_range<3>(block_nums * local_range, local_range), [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * local_range, local_range), [=](sycl::nd_item<3> item_ct1) { im2col_kernel(x, dst, batch_offset, offset_delta, IC, IW, IH, OH, OW, KW, KH, parallel_elements, CHW, s0, s1, p0, p1, d0, d1, item_ct1); }); diff --git a/ggml/src/ggml-sycl/mmq.cpp b/ggml/src/ggml-sycl/mmq.cpp index ffb272aa28378..c72fcd38ebeff 100644 --- a/ggml/src/ggml-sycl/mmq.cpp +++ b/ggml/src/ggml-sycl/mmq.cpp @@ -1818,7 +1818,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q4_0_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q4_0_acc_ct1( @@ -1829,9 +1829,8 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -1853,7 +1852,7 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q4_0_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q4_0_acc_ct1( @@ -1864,9 +1863,8 @@ static void ggml_mul_mat_q4_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -1933,7 +1931,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q4_1_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + +mmq_y), cgh); sycl::local_accessor tile_x_dm_q4_1_acc_ct1( @@ -1944,9 +1942,8 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_1( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -1968,7 +1965,7 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q4_1_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + +mmq_y), cgh); sycl::local_accessor tile_x_dm_q4_1_acc_ct1( @@ -1979,9 +1976,8 @@ static void ggml_mul_mat_q4_1_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_1( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2048,7 +2044,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_0_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q5_0_acc_ct1( @@ -2059,9 +2055,8 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2083,7 +2078,7 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_0_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q5_0_acc_ct1( @@ -2094,9 +2089,8 @@ static void ggml_mul_mat_q5_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2163,7 +2157,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_1_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q5_1_acc_ct1( @@ -2174,9 +2168,8 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_1( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2198,7 +2191,7 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_1_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q5_1_acc_ct1( @@ -2209,9 +2202,8 @@ static void ggml_mul_mat_q5_1_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_1( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2278,7 +2270,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q8_0_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q8_0_acc_ct1( @@ -2289,9 +2281,8 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q8_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2313,7 +2304,7 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_qs_q8_0_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_d_q8_0_acc_ct1( @@ -2324,9 +2315,8 @@ static void ggml_mul_mat_q8_0_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q8_0( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2393,7 +2383,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q2_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q2_K_acc_ct1( @@ -2406,9 +2396,8 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q2_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2431,7 +2420,7 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q2_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q2_K_acc_ct1( @@ -2444,9 +2433,8 @@ static void ggml_mul_mat_q2_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q2_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2516,7 +2504,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q3_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q3_K_acc_ct1( @@ -2531,9 +2519,8 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q3_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2557,7 +2544,7 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q3_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q3_K_acc_ct1( @@ -2572,9 +2559,8 @@ static void ggml_mul_mat_q3_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q3_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2644,7 +2630,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q4_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q4_K_acc_ct1( @@ -2657,9 +2643,8 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2682,7 +2667,7 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q4_K_acc_ct1( sycl::range<1>(mmq_y * (WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q4_K_acc_ct1( @@ -2695,9 +2680,8 @@ static void ggml_mul_mat_q4_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q4_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2765,7 +2749,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_K_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q5_K_acc_ct1( @@ -2778,9 +2762,8 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2803,7 +2786,7 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_q5_K_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_q5_K_acc_ct1( @@ -2816,9 +2799,8 @@ static void ggml_mul_mat_q5_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q5_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2886,7 +2868,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_acc_ct1( @@ -2899,9 +2881,8 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q6_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, @@ -2924,7 +2905,7 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy, dpct::has_capability_or_fail(stream->get_device(), {sycl::aspect::fp16}); - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor tile_x_ql_acc_ct1( sycl::range<1>(mmq_y * (2 * WARP_SIZE) + mmq_y), cgh); sycl::local_accessor tile_x_dm_acc_ct1( @@ -2937,9 +2918,8 @@ static void ggml_mul_mat_q6_K_q8_1_sycl(const void *vx, const void *vy, sycl::local_accessor tile_y_ds_acc_ct1( sycl::range<1>(mmq_x * WARP_SIZE / QI8_1), cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { mul_mat_q6_K( vx, vy, dst, ncols_x, nrows_x, ncols_y, nrows_y, nrows_dst, item_ct1, diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml/src/ggml-sycl/mmvq.cpp index 5b7f064074937..c21929d51e94c 100644 --- a/ggml/src/ggml-sycl/mmvq.cpp +++ b/ggml/src/ggml-sycl/mmvq.cpp @@ -544,12 +544,12 @@ static void reorder_mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy, const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, (block_num_y * WARP_SIZE)); const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE); - stream->submit([&](sycl::handler & cgh) { - cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size), - [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_reorder>(vx, vy, dst, ncols, nrows, - nd_item); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size), + [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_reorder>(vx, vy, dst, ncols, nrows, + nd_item); + }); }); } @@ -561,12 +561,12 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy, float * const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - stream->submit([&](sycl::handler & cgh) { - cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -580,17 +580,12 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -604,17 +599,12 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -628,17 +618,12 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -652,17 +637,12 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -676,17 +656,12 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -700,17 +675,12 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -724,17 +694,12 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -750,12 +715,12 @@ static void reorder_mul_mat_vec_q4_k_q8_1_sycl(const void * vx, const void * vy, const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE); const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE); - stream->submit([&](sycl::handler & cgh) { - cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size), - [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_reorder>(vx, vy, dst, ncols, - nrows, nd_item); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size), + [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_reorder>(vx, vy, dst, ncols, nrows, + nd_item); + }); }); } @@ -769,17 +734,12 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -794,12 +754,12 @@ static void reorder_mul_mat_vec_q6_k_q8_1_sycl(const void * vx, const void * vy, const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE); const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE); - stream->submit([&](sycl::handler & cgh) { - cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size), - [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_reorder>(vx, vy, dst, ncols, nrows, - nd_item); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(global_size, workgroup_size), + [=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_reorder>(vx, vy, dst, ncols, nrows, + nd_item); + }); }); } static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy, @@ -811,17 +771,12 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q( + vx, vy, dst, ncols, nrows, item_ct1); + }); }); } } @@ -836,14 +791,12 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq2_xxs_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq2_xxs_q8_1(vx, vy, dst, ncols, + nrows, item_ct1); + }); }); } } @@ -857,14 +810,12 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - stream->submit([&](sycl::handler & cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq2_xs_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq2_xs_q8_1(vx, vy, dst, ncols, + nrows, item_ct1); + }); }); } } @@ -878,15 +829,12 @@ static void mul_mat_vec_iq2_s_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq2_s_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq2_s_q8_1(vx, vy, dst, ncols, nrows, + item_ct1); + }); }); } } @@ -900,15 +848,12 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq3_xxs_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq3_xxs_q8_1(vx, vy, dst, ncols, + nrows, item_ct1); + }); }); } } @@ -922,15 +867,12 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq3_s_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq3_s_q8_1(vx, vy, dst, ncols, nrows, + item_ct1); + }); }); } } @@ -944,15 +886,12 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq1_s_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq1_s_q8_1(vx, vy, dst, ncols, nrows, + item_ct1); + }); }); } } @@ -966,14 +905,12 @@ static void mul_mat_vec_iq1_m_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq1_m_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq1_m_q8_1(vx, vy, dst, ncols, nrows, + item_ct1); + }); }); } } @@ -987,15 +924,12 @@ static void mul_mat_vec_iq4_nl_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq4_nl_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq4_nl_q8_1(vx, vy, dst, ncols, nrows, + item_ct1); + }); }); } } @@ -1009,15 +943,12 @@ static void mul_mat_vec_iq4_xs_q8_1_sycl(const void *vx, const void *vy, const sycl::range<3> block_nums(1, 1, block_num_y); const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE); { - - stream->submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - mul_mat_vec_q_iq4_xs_q8_1( - vx, vy, dst, ncols, nrows, item_ct1); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + mul_mat_vec_q_iq4_xs_q8_1(vx, vy, dst, ncols, + nrows, item_ct1); + }); }); } } diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml/src/ggml-sycl/norm.cpp index 4ec1416849c7e..79d846b41a15d 100644 --- a/ggml/src/ggml-sycl/norm.cpp +++ b/ggml/src/ggml-sycl/norm.cpp @@ -254,14 +254,13 @@ static void norm_f32_sycl(const float * x, float * dst, const int ncols, const i GGML_ASSERT(ncols % WARP_SIZE == 0); if (ncols < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); - stream->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl::nd_range<3>(global_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, nullptr, WARP_SIZE); - }); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(global_dims * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, + nullptr, WARP_SIZE); + }); + }); } else { const int work_group_size = ggml_sycl_info().max_work_group_sizes[device]; @@ -272,16 +271,15 @@ static void norm_f32_sycl(const float * x, float * dst, const int ncols, const i the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor s_sum_acc_ct1( sycl::range<1>(work_group_size / WARP_SIZE), cgh); - cgh.parallel_for( - sycl::nd_range<3>(global_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, get_pointer(s_sum_acc_ct1), work_group_size); - }); - }); + sycl_parallel_for(cgh, sycl::nd_range<3>(global_dims * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, + get_pointer(s_sum_acc_ct1), work_group_size); + }); + }); } } @@ -290,18 +288,14 @@ static void group_norm_f32_sycl(const float* x, float* dst, const int ne_elements, queue_ptr stream, int device) { if (group_size < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { const float eps_ct4 = eps; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims, - block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - group_norm_f32( - x, dst, group_size, ne_elements, eps_ct4, item_ct1, - nullptr, WARP_SIZE); - }); - }); + sycl_parallel_for(cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + group_norm_f32(x, dst, group_size, ne_elements, eps_ct4, item_ct1, nullptr, + WARP_SIZE); + }); + }); } else { const int work_group_size = ggml_sycl_info().max_work_group_sizes[device]; @@ -313,22 +307,18 @@ static void group_norm_f32_sycl(const float* x, float* dst, info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE), cgh); const float eps_ct4 = eps; - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims, - block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - group_norm_f32(x, dst, group_size, ne_elements, - eps_ct4, item_ct1, - get_pointer(s_sum_acc_ct1), work_group_size); - }); - }); + sycl_parallel_for(cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, num_groups) * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + group_norm_f32(x, dst, group_size, ne_elements, eps_ct4, item_ct1, + get_pointer(s_sum_acc_ct1), work_group_size); + }); + }); } } @@ -340,14 +330,13 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols, const const sycl::range<3> global_dims(nsamples, nchannels, nrows); if (ncols < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); - stream->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl::nd_range<3>(global_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - rms_norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, nullptr, WARP_SIZE); - }); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(global_dims * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + rms_norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, + nullptr, WARP_SIZE); + }); + }); } else { const int work_group_size = ggml_sycl_info().max_work_group_sizes[device]; @@ -358,16 +347,15 @@ static void rms_norm_f32_sycl(const float* x, float* dst, const int ncols, const the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE), cgh); - cgh.parallel_for( - sycl::nd_range<3>(global_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - rms_norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, get_pointer(s_sum_acc_ct1), work_group_size); - }); - }); + sycl_parallel_for(cgh, sycl::nd_range<3>(global_dims * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + rms_norm_f32(x, dst, ncols, stride_row, stride_channel, stride_sample, eps, item_ct1, + get_pointer(s_sum_acc_ct1), work_group_size); + }); + }); } } @@ -378,16 +366,12 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols, // printf("%s ncols=%d, nrows=%d, WARP_SIZE=%d\n", __func__, ncols, nrows, WARP_SIZE); if (ncols < 1024) { const sycl::range<3> block_dims(1, 1, WARP_SIZE); - stream->submit([&](sycl::handler& cgh) { - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, - block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - l2_norm_f32(x, dst, ncols, eps, item_ct1, - nullptr, WARP_SIZE); - }); - }); + sycl_launch(stream, [&](sycl::handler & cgh) { + sycl_parallel_for(cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + l2_norm_f32(x, dst, ncols, eps, item_ct1, nullptr, WARP_SIZE); + }); + }); } else { const int work_group_size = ggml_sycl_info().max_work_group_sizes[device]; @@ -398,18 +382,15 @@ static void l2_norm_f32_sycl(const float* x, float* dst, const int ncols, the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor s_sum_acc_ct1(sycl::range<1>(work_group_size / WARP_SIZE), cgh); - cgh.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, - block_dims), - [=](sycl::nd_item<3> item_ct1) - [[sycl::reqd_sub_group_size(WARP_SIZE)]] { - l2_norm_f32(x, dst, ncols, eps, item_ct1, - get_pointer(s_sum_acc_ct1), work_group_size); - }); - }); + sycl_parallel_for(cgh, sycl::nd_range<3>(sycl::range<3>(1, 1, nrows) * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { + l2_norm_f32(x, dst, ncols, eps, item_ct1, get_pointer(s_sum_acc_ct1), + work_group_size); + }); + }); } } diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml/src/ggml-sycl/rope.cpp index 44473e1e5580c..1b60226dcd531 100644 --- a/ggml/src/ggml-sycl/rope.cpp +++ b/ggml/src/ggml-sycl/rope.cpp @@ -47,18 +47,17 @@ static void rope_norm(const T * x, T * dst, const int ne0, const int ne1, const const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); - if (i0 >= n_dims) { - const int i = row * ne0 + i0; - *reinterpret_cast *>(dst + i) = *reinterpret_cast *>(x + i); - return; - } - const int row0 = row % ne1; const int channel0 = row / ne1; const int i = row * ne0 + i0; const int i2 = channel0 * s2 + row0 * s1 + i0; + if (i0 >= n_dims) { + *reinterpret_cast *>(dst + i) = *reinterpret_cast *>(x + i2); + return; + } + const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f); const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f; @@ -88,18 +87,17 @@ static void rope_neox(const T * x, T * dst, const int ne0, const int ne1, const const int row = item_ct1.get_local_range(2) * item_ct1.get_group(2) + item_ct1.get_local_id(2); - if (i0 >= n_dims) { - const int i = row * ne0 + i0; - *reinterpret_cast *>(dst + i) = *reinterpret_cast *>(x + i); - return; - } - const int row0 = row % ne1; const int channel0 = row / ne1; const int i = row * ne0 + i0 / 2; const int i2 = channel0 * s2 + row0 * s1 + i0 / 2; + if (i0 >= n_dims) { + *reinterpret_cast *>(dst + i + i0 / 2) = *reinterpret_cast *>(x + i2 + i0 / 2); + return; + } + const float theta_base = pos[channel0] * sycl::pow(theta_scale, i0 / 2.0f); const float freq_factor = has_ff ? freq_factors[i0 / 2] : 1.0f; @@ -129,17 +127,16 @@ static void rope_multi(const T * x, T * dst, const int ne0, const int ne1, const } const int row_dst = (item_ct1.get_group(2) * item_ct1.get_local_range(2)) + item_ct1.get_local_id(2); - if (i0 >= n_dims) { - const int i = row_dst*ne0 + i0; - *reinterpret_cast *>(dst + i) = *reinterpret_cast *>(x + i); - return; - } - const int row_x = row_dst % ne1; const int channel_x = row_dst / ne1; const int idst = (row_dst * ne0) + (i0 / 2); const size_t ix = ((size_t) channel_x * s2) + ((size_t) row_x * s1) + (i0 / 2); + if (i0 >= n_dims) { + *reinterpret_cast *>(dst + idst + i0 / 2) = *reinterpret_cast *>(x + i0 / 2 + ix); + return; + } + const int sect_dims = sections.v[0] + sections.v[1] + sections.v[2] + sections.v[3]; const int sec_w = sections.v[1] + sections.v[0]; const int sector = (i0 / 2) % sect_dims; @@ -235,20 +232,22 @@ static void rope_norm_sycl(const T * x, T * dst, const int ne0, const int ne1, c the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { - rope_norm(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, - theta_scale, freq_factors, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) { + rope_norm(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, + attn_factor, corr_dims, theta_scale, freq_factors, item_ct1); + }); } else { /* DPCT1049:41: The work-group size passed to the SYCL kernel may exceed the limit. To get the device limit, query info::device::max_work_group_size. Adjust the work-group size if needed. */ - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { - rope_norm(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, - theta_scale, freq_factors, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) { + rope_norm(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, + attn_factor, corr_dims, theta_scale, freq_factors, item_ct1); + }); } } @@ -267,15 +266,17 @@ static void rope_neox_sycl(const T * x, T * dst, const int ne0, const int ne1, c dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); if (freq_factors == nullptr) { - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { - rope_neox(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, - theta_scale, freq_factors, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) { + rope_neox(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, + attn_factor, corr_dims, theta_scale, freq_factors, item_ct1); + }); } else { - stream->parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { - rope_neox(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, - theta_scale, freq_factors, item_ct1); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(block_nums * block_dims, block_dims), + [=](sycl::nd_item<3> item_ct1) { + rope_neox(x, dst, ne0, ne1, s1, s2, n_dims, pos, freq_scale, ext_factor, + attn_factor, corr_dims, theta_scale, freq_factors, item_ct1); + }); } } @@ -298,12 +299,12 @@ static void rope_multi_sycl(const T * x, T * dst, const int ne0, const int ne1, } // launch kernel if (freq_factors == nullptr) { - stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(stream, nd_range, [=](sycl::nd_item<3> item_ct1) { rope_multi(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale, freq_factors, sections, item_ct1); }); } else { - stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(stream, nd_range, [=](sycl::nd_item<3> item_ct1) { rope_multi(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale, freq_factors, sections, item_ct1); }); @@ -333,12 +334,12 @@ static void rope_vision_sycl(const T * x, T * dst, const int ne0, const int ne1, } // launch kernel if (freq_factors == nullptr) { - stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(stream, nd_range, [=](sycl::nd_item<3> item_ct1) { rope_vision(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale, freq_factors, sections, item_ct1); }); } else { - stream->parallel_for(nd_range, [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for(stream, nd_range, [=](sycl::nd_item<3> item_ct1) { rope_vision(x, dst, ne0, ne1, ne2, s1, s2, n_dims, pos, freq_scale, ext_factor, attn_factor, corr_dims, theta_scale, freq_factors, sections, item_ct1); }); diff --git a/ggml/src/ggml-sycl/set_rows.cpp b/ggml/src/ggml-sycl/set_rows.cpp new file mode 100644 index 0000000000000..3091fab39958d --- /dev/null +++ b/ggml/src/ggml-sycl/set_rows.cpp @@ -0,0 +1,131 @@ +#include "set_rows.hpp" + +namespace utils { +template +static constexpr bool is_arithmetic_v() { + return std::is_arithmetic_v || std::is_same_v || std::is_same_v; +} +} + +template +static inline std::enable_if_t() && utils::is_arithmetic_v(), void> +convert (const char* src, char* dst) { + auto src_val = *reinterpret_cast(src); + auto dst_val = sycl::vec(src_val).template convert()[0]; + *reinterpret_cast(dst) = dst_val; +} + +template +static void k_set_rows( + const char * __restrict__ src0, const int64_t * __restrict__ src1, char * __restrict__ dst, + const int64_t ne00, const int64_t ne01, const int64_t ne02, + const int64_t ne11, const int64_t ne12, + const size_t nb01, const size_t nb02, const size_t nb03, + const size_t nb10, const size_t nb11, const size_t nb12, + const size_t nb1, const size_t nb2, const size_t nb3, + const size_t src_type_size, const size_t dst_type_size, + const int64_t total_elements, + const sycl::nd_item<1> & item_ct1) { + + const int64_t i = item_ct1.get_global_linear_id(); + if (i >= total_elements) { + return; + } + + const int64_t i03 = i / (ne00 * ne01 * ne02); + const int64_t i02 = (i - i03 * ne00 * ne01 * ne02) / (ne00 * ne01); + const int64_t i01 = (i - i03 * ne00 * ne01 * ne02 - i02 * ne00 * ne01) / ne00; + const int64_t i00 = i - i03 * ne00 * ne01 * ne02 - i02 * ne00 * ne01 - i01 * ne00; + + const int64_t i12 = i03 % ne12; + const int64_t i11 = i02 % ne11; + const int64_t i10 = i01; + + const int64_t dst_row = *(const int64_t *)((const char *)src1 + calculate_offset<3>({nb10, nb11, nb12}, {i10, i11, i12})); + + const char * src0_row = src0 + calculate_offset<3>({nb01, nb02, nb03}, {i01, i02, i03}); + const char * src_elem = src0_row + i00 * src_type_size; + char * dst_row_ptr = dst + dst_row*nb1 + i02*nb2 + i03*nb3; + char * dst_elem = dst_row_ptr + i00 * dst_type_size; + + convert(src_elem, dst_elem); +} + +template +static void set_rows_sycl( + const char * src0_d, const int64_t * src1_d, char * dst_d, + const int64_t ne00, const int64_t ne01, const int64_t ne02, const int64_t ne03, + const int64_t ne11, const int64_t ne12, const size_t nb01, const size_t nb02, const size_t nb03, + const size_t nb10, const size_t nb11, const size_t nb12, + const size_t nb1, const size_t nb2, const size_t nb3, + const size_t src_type_size, const size_t dst_type_size, + queue_ptr stream) { + + const int64_t total_elements = ne00 * ne01 * ne02 * ne03; + + constexpr int block_size = 64; + const int64_t grid_size = ceil_div(total_elements, block_size); + + sycl_parallel_for( + stream, + sycl::nd_range<1>(grid_size * block_size, block_size), + [=](sycl::nd_item<1> item_ct1) { + k_set_rows( + src0_d, src1_d, dst_d, + ne00, ne01, ne02, + ne11, ne12, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + src_type_size, dst_type_size, + total_elements, + item_ct1 + ); + } + ); +} + +void ggml_sycl_op_set_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { + scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2); + const ggml_tensor * src0 = dst->src[0]; + const ggml_tensor * src1 = dst->src[1]; + + GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(dst->src[1]->type == GGML_TYPE_I64); + + GGML_TENSOR_BINARY_OP_LOCALS + + const int64_t * src1_dd = static_cast(src1->data); + + dpct::queue_ptr stream = ctx.stream(); + switch (dst->type) { + case GGML_TYPE_F32: + set_rows_sycl( + (const char *)src0->data, src1_dd, (char *)dst->data, + ne00, ne01, ne02, ne03, + ne11, ne12, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + sizeof(float), sizeof(float), + stream + ); + break; + case GGML_TYPE_F16: + dpct::has_capability_or_fail(stream->get_device(), { sycl::aspect::fp16 }); + set_rows_sycl( + (const char *)src0->data, src1_dd, (char *)dst->data, + ne00, ne01, ne02, ne03, + ne11, ne12, + nb01, nb02, nb03, + nb10, nb11, nb12, + nb1, nb2, nb3, + sizeof(float), sizeof(sycl::half), + stream + ); + break; + default: + GGML_ABORT("Unsupported tensor type!"); + break; + } +} diff --git a/ggml/src/ggml-sycl/set_rows.hpp b/ggml/src/ggml-sycl/set_rows.hpp new file mode 100644 index 0000000000000..27fcc8f90175b --- /dev/null +++ b/ggml/src/ggml-sycl/set_rows.hpp @@ -0,0 +1,8 @@ +#ifndef GGML_SYCL_SET_ROWS_HPP +#define GGML_SYCL_SET_ROWS_HPP + +#include "common.hpp" + +void ggml_sycl_op_set_rows(ggml_backend_sycl_context & ctx, ggml_tensor * dst); + +#endif // GGML_SYCL_SET_ROWS_HPP diff --git a/ggml/src/ggml-sycl/softmax.cpp b/ggml/src/ggml-sycl/softmax.cpp index 52fcf4b3dbd24..7b60c292e0c92 100644 --- a/ggml/src/ggml-sycl/softmax.cpp +++ b/ggml/src/ggml-sycl/softmax.cpp @@ -127,11 +127,11 @@ static void soft_max_f32_submitter(const float * x, const T * mask, float * dst, const int nrows_y, const float scale, const float max_bias, const float m0, const float m1, uint32_t n_head_log2, sycl::range<3> block_nums, sycl::range<3> block_dims, const size_t n_local_scratch, queue_ptr stream) { - stream->submit([&](sycl::handler &cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor local_buf_acc(n_local_scratch, cgh); - cgh.parallel_for( - sycl::nd_range<3>(block_nums * block_dims, block_dims), + sycl_parallel_for( + cgh, sycl::nd_range<3>(block_nums * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(WARP_SIZE)]] { soft_max_f32(x, mask, dst, ncols_par, nrows_y, scale, max_bias, m0, diff --git a/ggml/src/ggml-sycl/sycl_hw.cpp b/ggml/src/ggml-sycl/sycl_hw.cpp index da121ffc261e8..7041140034b45 100644 --- a/ggml/src/ggml-sycl/sycl_hw.cpp +++ b/ggml/src/ggml-sycl/sycl_hw.cpp @@ -1,6 +1,7 @@ #include "sycl_hw.hpp" - +// TODO: currently not used +/* sycl_hw_info get_device_hw_info(sycl::device *device_ptr) { sycl_hw_info res; int32_t id = device_ptr->get_info(); @@ -11,3 +12,4 @@ sycl_hw_info get_device_hw_info(sycl::device *device_ptr) { return res; } +*/ diff --git a/ggml/src/ggml-sycl/sycl_hw.hpp b/ggml/src/ggml-sycl/sycl_hw.hpp index bf689450ce61f..36b140bf03737 100644 --- a/ggml/src/ggml-sycl/sycl_hw.hpp +++ b/ggml/src/ggml-sycl/sycl_hw.hpp @@ -10,6 +10,8 @@ namespace syclex = sycl::ext::oneapi::experimental; +// TODO: currently not used +/* struct sycl_hw_info { syclex::architecture arch; int32_t device_id; @@ -18,6 +20,7 @@ struct sycl_hw_info { bool is_in_vector(std::vector &vec, int item); sycl_hw_info get_device_hw_info(sycl::device *device_ptr); +*/ #endif // SYCL_HW_HPP diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml/src/ggml-sycl/tsembd.cpp index f6ca626ea7a53..721c8fa6fa27e 100644 --- a/ggml/src/ggml-sycl/tsembd.cpp +++ b/ggml/src/ggml-sycl/tsembd.cpp @@ -45,14 +45,9 @@ static void timestep_embedding_f32_sycl( int num_blocks = (half_ceil + SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE - 1) / SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE; sycl::range<3> block_dims(1, 1, SYCL_TIMESTEP_EMBEDDING_BLOCK_SIZE); sycl::range<3> gridDim(1, ne00, num_blocks); - stream->parallel_for( - sycl::nd_range<3>( - gridDim * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { - timestep_embedding_f32( - x, dst, nb1, dim, max_period, item_ct1 - ); - }); + sycl_parallel_for(stream, sycl::nd_range<3>(gridDim * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { + timestep_embedding_f32(x, dst, nb1, dim, max_period, item_ct1); + }); } void ggml_sycl_op_timestep_embedding(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { diff --git a/ggml/src/ggml-sycl/wkv.cpp b/ggml/src/ggml-sycl/wkv.cpp index c10e2f7645e89..3ed5bbf355ad9 100644 --- a/ggml/src/ggml-sycl/wkv.cpp +++ b/ggml/src/ggml-sycl/wkv.cpp @@ -207,12 +207,11 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { // Submit kernel if (C / H == WKV_BLOCK_SIZE) { - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor shared_mem_acc(shared_mem_size, cgh); - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { rwkv_wkv6_f32_kernel( B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d, item_ct1, (float*)shared_mem_acc.get_multi_ptr().get() @@ -220,12 +219,11 @@ void ggml_sycl_op_rwkv_wkv6(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { }); }); } else { - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor shared_mem_acc(shared_mem_size, cgh); - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { rwkv_wkv6_f32_kernel( B, T, C, H, k_d, v_d, r_d, tf_d, td_d, s_d, dst_d, item_ct1, (float*)shared_mem_acc.get_multi_ptr().get() @@ -264,12 +262,11 @@ void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { // Submit kernel if (C / H == WKV_BLOCK_SIZE) { - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor shared_mem_acc(shared_mem_size, cgh); - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { rwkv_wkv7_f32_kernel( B, T, C, H, r_d, w_d, k_d, v_d, a_d, b_d, s_d, dst_d, item_ct1, (float*)shared_mem_acc.get_multi_ptr().get() @@ -277,12 +274,11 @@ void ggml_sycl_op_rwkv_wkv7(ggml_backend_sycl_context& ctx, ggml_tensor* dst) { }); }); } else { - stream->submit([&](sycl::handler& cgh) { + sycl_launch(stream, [&](sycl::handler & cgh) { sycl::local_accessor shared_mem_acc(shared_mem_size, cgh); - cgh.parallel_for( - sycl::nd_range<3>(grid_dims * block_dims, block_dims), - [=](sycl::nd_item<3> item_ct1) { + sycl_parallel_for( + cgh, sycl::nd_range<3>(grid_dims * block_dims, block_dims), [=](sycl::nd_item<3> item_ct1) { rwkv_wkv7_f32_kernel( B, T, C, H, r_d, w_d, k_d, v_d, a_d, b_d, s_d, dst_d, item_ct1, (float*)shared_mem_acc.get_multi_ptr().get() diff --git a/ggml/src/ggml-vulkan/CMakeLists.txt b/ggml/src/ggml-vulkan/CMakeLists.txt index 4a88415f96eae..b97e7bf995504 100644 --- a/ggml/src/ggml-vulkan/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/CMakeLists.txt @@ -49,15 +49,7 @@ if (Vulkan_FOUND) ../../include/ggml-vulkan.h ) - set(VULKAN_SHADER_GEN_CMAKE_ARGS - -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR} - -DCMAKE_RUNTIME_OUTPUT_DIRECTORY=${CMAKE_RUNTIME_OUTPUT_DIRECTORY} - ) - - set(VULKAN_SHADER_GEN_CMAKE_BUILD_ARGS "") - if (CMAKE_BUILD_TYPE AND CMAKE_BUILD_TYPE MATCHES "Debug|Release|MinSizeRel|RelWithDebInfo") - list(APPEND VULKAN_SHADER_GEN_CMAKE_BUILD_ARGS --config=${CMAKE_BUILD_TYPE}) - endif() + set(VULKAN_SHADER_GEN_CMAKE_ARGS "") # Test all shader extensions test_shader_extension_support( @@ -107,6 +99,7 @@ if (Vulkan_FOUND) if (GGML_VULKAN_SHADER_DEBUG_INFO) add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO) + list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DGGML_VULKAN_SHADER_DEBUG_INFO=ON) endif() if (GGML_VULKAN_VALIDATE) @@ -136,42 +129,54 @@ if (Vulkan_FOUND) set(HOST_CMAKE_TOOLCHAIN_FILE "") endif() - # Always use ExternalProject_Add approach include(ExternalProject) - # Add toolchain file if cross-compiling if (CMAKE_CROSSCOMPILING) list(APPEND VULKAN_SHADER_GEN_CMAKE_ARGS -DCMAKE_TOOLCHAIN_FILE=${HOST_CMAKE_TOOLCHAIN_FILE}) message(STATUS "vulkan-shaders-gen toolchain file: ${HOST_CMAKE_TOOLCHAIN_FILE}") endif() - # Native build through ExternalProject_Add ExternalProject_Add( vulkan-shaders-gen SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders - CMAKE_ARGS ${VULKAN_SHADER_GEN_CMAKE_ARGS} - BUILD_COMMAND ${CMAKE_COMMAND} --build . ${VULKAN_SHADER_GEN_CMAKE_BUILD_ARGS} - INSTALL_COMMAND ${CMAKE_COMMAND} --install . - INSTALL_DIR ${CMAKE_BINARY_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/$ + -DCMAKE_INSTALL_BINDIR=. + -DCMAKE_BUILD_TYPE=$ + ${VULKAN_SHADER_GEN_CMAKE_ARGS} + + BUILD_COMMAND ${CMAKE_COMMAND} --build . --config $ + BUILD_ALWAYS TRUE + + # NOTE: When DESTDIR is set using Makefile generators and + # "make install" triggers the build step, vulkan-shaders-gen + # would be installed into the DESTDIR prefix, so it is unset + # to ensure that does not happen. + + INSTALL_COMMAND ${CMAKE_COMMAND} -E env --unset=DESTDIR + ${CMAKE_COMMAND} --install . --config $ ) - ExternalProject_Add_StepTargets(vulkan-shaders-gen build install) set (_ggml_vk_host_suffix $,.exe,>) - set (_ggml_vk_genshaders_cmd ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/vulkan-shaders-gen${_ggml_vk_host_suffix}) - set (_ggml_vk_header ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp) - set (_ggml_vk_source ${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp) - set (_ggml_vk_input_dir ${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders) - set (_ggml_vk_output_dir ${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv) + set (_ggml_vk_genshaders_dir "${CMAKE_BINARY_DIR}/$") + set (_ggml_vk_genshaders_cmd "${_ggml_vk_genshaders_dir}/vulkan-shaders-gen${_ggml_vk_host_suffix}") + set (_ggml_vk_header "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.hpp") + set (_ggml_vk_source "${CMAKE_CURRENT_BINARY_DIR}/ggml-vulkan-shaders.cpp") + set (_ggml_vk_input_dir "${CMAKE_CURRENT_SOURCE_DIR}/vulkan-shaders") + set (_ggml_vk_output_dir "${CMAKE_CURRENT_BINARY_DIR}/vulkan-shaders.spv") - file(GLOB _ggml_vk_shader_deps "${_ggml_vk_input_dir}/*.comp") - set (_ggml_vk_shader_deps ${_ggml_vk_shader_deps} vulkan-shaders-gen) + file(GLOB _ggml_vk_shader_files CONFIGURE_DEPENDS "${_ggml_vk_input_dir}/*.comp") - # Add build and install dependencies for all builds - set(_ggml_vk_shader_deps ${_ggml_vk_shader_deps} vulkan-shaders-gen-build vulkan-shaders-gen-install) + # Because external projects do not provide source-level tracking, + # the vulkan-shaders-gen sources need to be explicitly added to + # ensure that changes will cascade into shader re-generation. + + file(GLOB _ggml_vk_shaders_gen_sources + CONFIGURE_DEPENDS "${_ggml_vk_input_dir}/*.cpp" + "${_ggml_vk_input_dir}/*.h") add_custom_command( OUTPUT ${_ggml_vk_header} - ${_ggml_vk_source} + ${_ggml_vk_source} COMMAND ${_ggml_vk_genshaders_cmd} --glslc ${Vulkan_GLSLC_EXECUTABLE} @@ -181,7 +186,10 @@ if (Vulkan_FOUND) --target-cpp ${_ggml_vk_source} --no-clean - DEPENDS ${_ggml_vk_shader_deps} + DEPENDS ${_ggml_vk_shader_files} + ${_ggml_vk_shaders_gen_sources} + vulkan-shaders-gen + COMMENT "Generate vulkan shaders" ) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 32d6407441535..3019a545d58ed 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -168,6 +168,11 @@ struct vk_command_pool { vk_queue *q; }; +// Prevent simultaneous submissions to the same queue. +// This could be per vk_queue if we stopped having two vk_queue structures +// sharing the same vk::Queue. +static std::mutex queue_mutex; + struct vk_queue { uint32_t queue_family_index; vk::Queue queue; @@ -219,6 +224,21 @@ enum vk_device_architecture { INTEL_XE2, }; +// HSK x HSV +enum FaHeadSizes { + FA_HEAD_SIZE_64, + FA_HEAD_SIZE_80, + FA_HEAD_SIZE_96, + FA_HEAD_SIZE_112, + FA_HEAD_SIZE_128, + FA_HEAD_SIZE_192, + FA_HEAD_SIZE_192_128, + FA_HEAD_SIZE_256, + FA_HEAD_SIZE_576_512, + FA_HEAD_SIZE_UNSUPPORTED, + FA_HEAD_SIZE_COUNT = FA_HEAD_SIZE_UNSUPPORTED, +}; + static vk_device_architecture get_device_architecture(const vk::PhysicalDevice& device) { vk::PhysicalDeviceProperties props = device.getProperties(); @@ -300,7 +320,7 @@ static vk_device_architecture get_device_architecture(const vk::PhysicalDevice& } struct vk_device_struct { - std::mutex mutex; + std::recursive_mutex mutex; vk::PhysicalDevice physical_device; vk::PhysicalDeviceProperties properties; @@ -405,32 +425,42 @@ struct vk_device_struct { vk_pipeline pipeline_div_norepeat[2][2][2]; vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32; - vk_pipeline pipeline_upscale_f32; + vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bilinear_ac_f32; vk_pipeline pipeline_scale_f32; vk_pipeline pipeline_sqr_f32; vk_pipeline pipeline_sin_f32; vk_pipeline pipeline_cos_f32; vk_pipeline pipeline_clamp_f32; vk_pipeline pipeline_pad_f32; + vk_pipeline pipeline_roll_f32; vk_pipeline pipeline_repeat_f32, pipeline_repeat_back_f32; vk_pipeline pipeline_cpy_f32_f32, pipeline_cpy_f32_f16, pipeline_cpy_f16_f16, pipeline_cpy_f16_f32, pipeline_cpy_f32_bf16; vk_pipeline pipeline_contig_cpy_f32_f32, pipeline_contig_cpy_f32_f16, pipeline_contig_cpy_f16_f16, pipeline_contig_cpy_f16_f32, pipeline_contig_cpy_f32_bf16; vk_pipeline pipeline_cpy_f32_quant[GGML_TYPE_COUNT]; vk_pipeline pipeline_cpy_quant_f32[GGML_TYPE_COUNT]; + vk_pipeline pipeline_set_rows[GGML_TYPE_COUNT]; vk_pipeline pipeline_norm_f32; vk_pipeline pipeline_group_norm_f32; vk_pipeline pipeline_rms_norm_f32; + vk_pipeline pipeline_rms_norm_mul_f32; vk_pipeline pipeline_rms_norm_back_f32; vk_pipeline pipeline_l2_norm_f32; // [src/dst 0=fp32,1=fp16] vk_pipeline pipeline_gelu[2]; + vk_pipeline pipeline_gelu_erf[2]; vk_pipeline pipeline_gelu_quick[2]; vk_pipeline pipeline_silu[2]; vk_pipeline pipeline_relu[2]; vk_pipeline pipeline_tanh[2]; vk_pipeline pipeline_sigmoid[2]; + vk_pipeline pipeline_geglu[2]; + vk_pipeline pipeline_reglu[2]; + vk_pipeline pipeline_swiglu[2]; + vk_pipeline pipeline_geglu_erf[2]; + vk_pipeline pipeline_geglu_quick[2]; + vk_pipeline pipeline_leaky_relu_f32; vk_pipeline pipeline_silu_back_f32; vk_pipeline pipeline_diag_mask_inf_f32; @@ -456,26 +486,11 @@ struct vk_device_struct { vk_pipeline pipeline_conv2d_dw_cwhn_f32; // [2][2][2] is for {f16acc,f32acc}x{large,small_rows}x{unaligned, aligned} - vk_pipeline pipeline_flash_attn_f32_f16_D64_cm2[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D80_cm2[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D96_cm2[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D112_cm2[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D128_cm2[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D256_cm2[GGML_TYPE_COUNT][2][2][2]; - - vk_pipeline pipeline_flash_attn_f32_f16_D64_cm1[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D80_cm1[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D96_cm1[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D112_cm1[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D128_cm1[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D256_cm1[GGML_TYPE_COUNT][2][2][2]; - - vk_pipeline pipeline_flash_attn_f32_f16_D64[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D80[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D96[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D112[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D128[GGML_TYPE_COUNT][2][2][2]; - vk_pipeline pipeline_flash_attn_f32_f16_D256[GGML_TYPE_COUNT][2][2][2]; + vk_pipeline pipeline_flash_attn_f32_f16_cm2[GGML_TYPE_COUNT][FA_HEAD_SIZE_COUNT][2][2][2]; + + vk_pipeline pipeline_flash_attn_f32_f16_cm1[GGML_TYPE_COUNT][FA_HEAD_SIZE_COUNT][2][2][2]; + + vk_pipeline pipeline_flash_attn_f32_f16[GGML_TYPE_COUNT][FA_HEAD_SIZE_COUNT][2][2][2]; vk_pipeline pipeline_flash_attn_split_k_reduce; @@ -488,6 +503,8 @@ struct vk_device_struct { ggml_backend_buffer_type buffer_type; + bool disable_fusion; + #ifdef GGML_VULKAN_MEMORY_DEBUG std::unique_ptr memory_logger; #endif @@ -622,6 +639,8 @@ struct vk_flash_attn_push_constants { uint32_t nev2; uint32_t nev3; uint32_t nem1; + uint32_t nem2; + uint32_t nem3; uint32_t nb01; uint32_t nb02; @@ -632,14 +651,12 @@ struct vk_flash_attn_push_constants { uint32_t nb21; uint32_t nb22; uint32_t nb23; - uint32_t nb31; float scale; float max_bias; float logit_softcap; - uint32_t mask; - uint32_t n_head_log2; + uint32_t mask_n_head_log2; float m0; float m1; @@ -647,6 +664,7 @@ struct vk_flash_attn_push_constants { uint32_t split_kv; uint32_t k_num; }; +static_assert(sizeof(vk_flash_attn_push_constants) <= 128, "sizeof(vk_flash_attn_push_constants) must be <= 128"); struct vk_op_push_constants { uint32_t KX; @@ -655,6 +673,13 @@ struct vk_op_push_constants { float param2; }; +struct vk_op_glu_push_constants { + uint32_t N; + uint32_t ne00; + uint32_t ne20; + uint32_t mode; // 0: default, 1: swapped, 2: split +}; + struct vk_op_unary_push_constants { uint32_t ne; uint32_t ne00; uint32_t ne01; uint32_t ne02; uint32_t ne03; uint32_t nb00; uint32_t nb01; uint32_t nb02; uint32_t nb03; @@ -670,6 +695,37 @@ struct vk_op_unary_push_constants { }; static_assert(sizeof(vk_op_unary_push_constants) <= 128, "sizeof(vk_op_unary_push_constants) must be <= 128"); +static vk_op_unary_push_constants vk_op_unary_push_constants_init(const ggml_tensor * src0, const ggml_tensor * dst, int64_t ne = 0) { + GGML_ASSERT(ne != 0 || (ggml_nelements(src0) == ggml_nelements(dst))); + ne = ne != 0 ? ne : ggml_nelements(dst); + GGML_ASSERT(ne <= (int64_t)std::numeric_limits::max()); + + vk_op_unary_push_constants p{}; + p.ne = (uint32_t)ne; + + size_t src0_tsize = ggml_type_size(src0->type); + p.ne00 = (uint32_t)src0->ne[0]; + p.ne01 = (uint32_t)src0->ne[1]; + p.ne02 = (uint32_t)src0->ne[2]; + p.ne03 = (uint32_t)src0->ne[3]; + p.nb00 = (uint32_t)(src0->nb[0] / src0_tsize); + p.nb01 = (uint32_t)(src0->nb[1] / src0_tsize); + p.nb02 = (uint32_t)(src0->nb[2] / src0_tsize); + p.nb03 = (uint32_t)(src0->nb[3] / src0_tsize); + + size_t dst_tsize = ggml_type_size(dst->type); + p.ne10 = (uint32_t)dst->ne[0]; + p.ne11 = (uint32_t)dst->ne[1]; + p.ne12 = (uint32_t)dst->ne[2]; + p.ne13 = (uint32_t)dst->ne[3]; + p.nb10 = (uint32_t)(dst->nb[0] / dst_tsize); + p.nb11 = (uint32_t)(dst->nb[1] / dst_tsize); + p.nb12 = (uint32_t)(dst->nb[2] / dst_tsize); + p.nb13 = (uint32_t)(dst->nb[3] / dst_tsize); + + return p; // fastdiv values and offsets are initialized later in ggml_vk_op +} + // See https://gmplib.org/~tege/divcnst-pldi94.pdf figure 4.1. // Precompute mp (m' in the paper) and L such that division // can be computed using a multiply (high 32b of 64b result) @@ -738,6 +794,14 @@ struct vk_op_rope_push_constants { struct vk_op_soft_max_push_constants { uint32_t KX; uint32_t KY; + uint32_t ne00; + uint32_t ne01; + uint32_t ne02; + uint32_t ne12; + uint32_t ne13; + uint32_t nb11; + uint32_t nb12; + uint32_t nb13; float scale; float max_bias; float m0; @@ -831,6 +895,7 @@ struct vk_op_conv2d_dw_push_constants { struct vk_op_upscale_push_constants { uint32_t ne; uint32_t a_offset; uint32_t d_offset; + uint32_t ne00; uint32_t ne01; uint32_t nb00; uint32_t nb01; uint32_t nb02; uint32_t nb03; uint32_t ne10; uint32_t ne11; uint32_t ne12; uint32_t ne13; float sf0; float sf1; float sf2; float sf3; @@ -973,6 +1038,10 @@ struct ggml_backend_vk_context { vk_command_pool compute_cmd_pool; vk_command_pool transfer_cmd_pool; + + // number of additional consecutive nodes that are being fused with the + // node currently being processed + int num_additional_fused_ops {}; }; static void * const vk_ptr_base = (void *)(uintptr_t) 0x1000; // NOLINT @@ -1036,6 +1105,14 @@ void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) { struct vk_instance_t { vk::Instance instance; + bool debug_utils_support = false; // VK_EXT_debug_utils enabled + PFN_vkSetDebugUtilsObjectNameEXT pfn_vkSetDebugUtilsObjectNameEXT = {}; + PFN_vkQueueBeginDebugUtilsLabelEXT pfn_vkQueueBeginDebugUtilsLabelEXT = {}; + PFN_vkQueueEndDebugUtilsLabelEXT pfn_vkQueueEndDebugUtilsLabelEXT = {}; + PFN_vkCmdBeginDebugUtilsLabelEXT pfn_vkCmdBeginDebugUtilsLabelEXT = {}; + PFN_vkCmdEndDebugUtilsLabelEXT pfn_vkCmdEndDebugUtilsLabelEXT = {}; + PFN_vkCmdInsertDebugUtilsLabelEXT pfn_vkCmdInsertDebugUtilsLabelEXT = {}; + std::vector device_indices; vk_device devices[GGML_VK_MAX_DEVICES]; }; @@ -1050,8 +1127,8 @@ static size_t vk_skip_checks; static size_t vk_output_tensor; static void ggml_vk_print_tensor(const ggml_tensor * tensor, const char * name); -static void ggml_vk_check_results_0(ggml_tensor * tensor); -static void ggml_vk_check_results_1(ggml_tensor * tensor); +static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int tensor_idx); +static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int tensor_idx); #endif typedef void (*ggml_vk_func_t)(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst); @@ -1175,8 +1252,16 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin } pipeline->compiled = true; + if (vk_instance.debug_utils_support) { + vk::DebugUtilsObjectNameInfoEXT duoni; + duoni.objectType = vk::ObjectType::ePipeline; + duoni.pObjectName = pipeline->name.c_str(); + duoni.objectHandle = reinterpret_cast(static_cast(pipeline->pipeline)); + vk_instance.pfn_vkSetDebugUtilsObjectNameEXT(device->device, &static_cast(duoni)); + } + { - std::lock_guard guard(device->mutex); + std::lock_guard guard(device->mutex); device->pipelines.insert({ pipeline->name, pipeline }); } @@ -1266,6 +1351,7 @@ static vk::CommandBuffer ggml_vk_create_cmd_buffer(vk_device& device, vk_command static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { if (ctx->seqs.empty()) { if (fence) { + std::lock_guard guard(queue_mutex); ctx->p->q->queue.submit({}, fence); } return; @@ -1335,6 +1421,7 @@ static void ggml_vk_submit(vk_context& ctx, vk::Fence fence) { } } + std::lock_guard guard(queue_mutex); ctx->p->q->queue.submit(submit_infos, fence); ctx->seqs.clear(); @@ -1388,7 +1475,7 @@ static uint32_t ggml_vk_find_queue_family_index(std::vector guard(device->mutex); + std::lock_guard guard(device->mutex); q.queue_family_index = queue_family_index; q.transfer_only = transfer_only; @@ -1650,10 +1737,46 @@ enum FaCodePath { FA_COOPMAT2, }; +static FaHeadSizes fa_get_head_sizes(uint32_t hsk, uint32_t hsv) { + if (hsk != 192 && hsk != 576 && hsk != hsv) { + return FA_HEAD_SIZE_UNSUPPORTED; + } + switch (hsk) { + case 64: return FA_HEAD_SIZE_64; + case 80: return FA_HEAD_SIZE_80; + case 96: return FA_HEAD_SIZE_96; + case 112: return FA_HEAD_SIZE_112; + case 128: return FA_HEAD_SIZE_128; + case 192: + if (hsv == 192) { + return FA_HEAD_SIZE_192; + } else if (hsv == 128) { + return FA_HEAD_SIZE_192_128; + } else { + return FA_HEAD_SIZE_UNSUPPORTED; + } + case 256: return FA_HEAD_SIZE_256; + case 576: + if (hsv == 512) { + return FA_HEAD_SIZE_576_512; + } else { + return FA_HEAD_SIZE_UNSUPPORTED; + } + default: return FA_HEAD_SIZE_UNSUPPORTED; + } +} + // number of rows/cols for flash attention shader static constexpr uint32_t flash_attention_num_small_rows = 32; static constexpr uint32_t scalar_flash_attention_num_small_rows = 1; -static constexpr uint32_t scalar_flash_attention_num_large_rows = 8; + +static uint32_t get_fa_scalar_num_large_rows(uint32_t hsv) { + if (hsv >= 512) { + return 2; + } else { + return 8; + } +} // The FA coopmat1 shader assumes 16x16x16 matrix multiply support. // 128 threads split into four subgroups, each subgroup does 1/4 @@ -1670,14 +1793,15 @@ static uint32_t get_fa_num_small_rows(FaCodePath path) { } } -static std::array fa_rows_cols(FaCodePath path, uint32_t D, uint32_t clamp, ggml_type type, bool small_rows) { +static std::array fa_rows_cols(FaCodePath path, uint32_t hsk, uint32_t hsv, uint32_t clamp, ggml_type type, bool small_rows) { GGML_UNUSED(clamp); + GGML_UNUSED(hsv); if (path == FA_SCALAR) { if (small_rows) { return {scalar_flash_attention_num_small_rows, 64}; } else { - return {scalar_flash_attention_num_large_rows, 32}; + return {get_fa_scalar_num_large_rows(hsv), 32}; } } @@ -1695,8 +1819,12 @@ static std::array fa_rows_cols(FaCodePath path, uint32_t D, uint32_ } // small cols to reduce register count - if (ggml_is_quantized(type) || D == 256) { - return {64, 32}; + if (ggml_is_quantized(type) || hsk >= 256) { + if (hsk >= 512) { + return {32, 32}; + } else { + return {64, 32}; + } } return {64, 64}; } @@ -1738,7 +1866,7 @@ static bool ggml_vk_matmul_shmem_support(const vk_device& device, const std::vec const uint32_t warps = warptile[0] / warptile[10]; const uint32_t load_bufs = (warptile[1] + warptile[2]) * (warptile[3] + bank_conflict_offset) * type_size; - const uint32_t mmid_row_ids = mul_mat_id ? 4096 * sizeof(uint32_t) : 0; + const uint32_t mmid_row_ids = mul_mat_id ? (4096 * sizeof(uint32_t) + 4/*_ne1*/) : 0; const uint32_t coopmat_stage = device->coopmat_support ? warptile[7] * warptile[8] / warps * sizeof(float) : 0; const uint32_t total_size = load_bufs + mmid_row_ids + coopmat_stage + lut_size; @@ -1863,10 +1991,10 @@ static void ggml_vk_load_shaders(vk_device& device) { s_mmq_wg_denoms_k = { 32, 32, 1 }; // spec constants and tile sizes for quant matmul_id - l_warptile_mmqid = { 256, 128, 64, 16, 0 }; + l_warptile_mmqid = { 256, 128, 128, 16, 0 }; m_warptile_mmqid = { 256, 128, 64, 16, 0 }; s_warptile_mmqid = { 256, 128, 64, 16, 0 }; - l_mmqid_wg_denoms = { 128, 64, 1 }; + l_mmqid_wg_denoms = { 128, 128, 1 }; m_mmqid_wg_denoms = { 128, 64, 1 }; s_mmqid_wg_denoms = { 128, 64, 1 }; @@ -1988,19 +2116,21 @@ static void ggml_vk_load_shaders(vk_device& device) { parameter_count, wg_denoms, specialization_constants, disable_robustness, require_full_subgroups, required_subgroup_size)); }; - auto const &fa_wg_denoms = [&](FaCodePath path, uint32_t D, uint32_t clamp, ggml_type type, bool small_rows) -> std::array { - return {fa_rows_cols(path, D, clamp, type, small_rows)[0], 1, 1}; + auto const &fa_wg_denoms = [&](FaCodePath path, uint32_t hsk, uint32_t hsv, uint32_t clamp, ggml_type type, bool small_rows) -> std::array { + return {fa_rows_cols(path, hsk, hsv, clamp, type, small_rows)[0], 1, 1}; }; - auto const &fa_spec_constants = [&](FaCodePath path, uint32_t D, uint32_t clamp, ggml_type type, bool small_rows) -> std::vector { + auto const &fa_spec_constants = [&](FaCodePath path, uint32_t hsk, uint32_t hsv, uint32_t clamp, ggml_type type, bool small_rows) -> std::vector { // For large number of rows, 128 invocations seems to work best. // For small number of rows (e.g. N==1), 256 works better. But matrix granularity for 256 is 32, so we // can't use 256 for D==80. // For scalar, use 128 (arbitrary) + // The same D_split value is used for both HSK and HSV, so just base it on the union of the LSBs. + const uint32_t D = (hsk|hsv); uint32_t wg_size = (path == FA_SCALAR || path == FA_COOPMAT1) ? scalar_flash_attention_workgroup_size : ((small_rows && (D % 32) == 0) ? 256 : 128); - auto rows_cols = fa_rows_cols(path, D, clamp, type, small_rows); + auto rows_cols = fa_rows_cols(path, hsk, hsv, clamp, type, small_rows); // D_split can't be larger than a subgroup because we use subgroupShuffle to reduce it. // D_split can't be larger than the LSB of D divided by 4 due to vectorization in the shader. @@ -2009,26 +2139,29 @@ static void ggml_vk_load_shaders(vk_device& device) { // mask dim1 is padded to 64, we rely on this to avoid clamping mask loads GGML_ASSERT((GGML_KQ_MASK_PAD % rows_cols[0]) == 0); - return {wg_size, rows_cols[0], rows_cols[1], (D), clamp, D_split}; + return {wg_size, rows_cols[0], rows_cols[1], hsk, hsv, clamp, D_split}; }; -#define CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, D) \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][0][0][0], "flash_attn_f32_f16_D" #D "_f16acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,1,TYPE,false), fa_spec_constants(FAPATH, D,1,TYPE,false), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][0][0][1], "flash_attn_f32_f16_D" #D "_aligned_f16acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,0,TYPE,false), fa_spec_constants(FAPATH, D,0,TYPE,false), fa_rows_cols(FAPATH,D,0,TYPE,false)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][1][0][0], "flash_attn_f32_f16_D" #D "_f32acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,1,TYPE,false), fa_spec_constants(FAPATH, D,1,TYPE,false), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][1][0][1], "flash_attn_f32_f16_D" #D "_aligned_f32acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,0,TYPE,false), fa_spec_constants(FAPATH, D,0,TYPE,false), fa_rows_cols(FAPATH,D,0,TYPE,false)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][0][1][0], "flash_attn_f32_f16_D" #D "_f16acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,1,TYPE,true), fa_spec_constants(FAPATH, D,1,TYPE,true), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][0][1][1], "flash_attn_f32_f16_D" #D "_aligned_f16acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,0,TYPE,true), fa_spec_constants(FAPATH, D,0,TYPE,true), fa_rows_cols(FAPATH,D,0,TYPE,true)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][1][1][0], "flash_attn_f32_f16_D" #D "_f32acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,1,TYPE,true), fa_spec_constants(FAPATH, D,1,TYPE,true), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16_D ## D ## SUFFIX[TYPE][1][1][1], "flash_attn_f32_f16_D" #D "_aligned_f32acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, D,0,TYPE,true), fa_spec_constants(FAPATH, D,0,TYPE,true), fa_rows_cols(FAPATH,D,0,TYPE,true)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ +#define CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, HSK, HSV, HEAD_SIZES) \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][0][0][0], "flash_attn_f32_f16_" #HEAD_SIZES "_f16acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,1,TYPE,false), fa_spec_constants(FAPATH, HSK,HSV,1,TYPE,false), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][0][0][1], "flash_attn_f32_f16_" #HEAD_SIZES "_aligned_f16acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,0,TYPE,false), fa_spec_constants(FAPATH, HSK,HSV,0,TYPE,false), fa_rows_cols(FAPATH,HSK,HSV,0,TYPE,false)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][1][0][0], "flash_attn_f32_f16_" #HEAD_SIZES "_f32acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,1,TYPE,false), fa_spec_constants(FAPATH, HSK,HSV,1,TYPE,false), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][1][0][1], "flash_attn_f32_f16_" #HEAD_SIZES "_aligned_f32acc" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,0,TYPE,false), fa_spec_constants(FAPATH, HSK,HSV,0,TYPE,false), fa_rows_cols(FAPATH,HSK,HSV,0,TYPE,false)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][0][1][0], "flash_attn_f32_f16_" #HEAD_SIZES "_f16acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,1,TYPE,true), fa_spec_constants(FAPATH, HSK,HSV,1,TYPE,true), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][0][1][1], "flash_attn_f32_f16_" #HEAD_SIZES "_aligned_f16acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## _f16acc ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,0,TYPE,true), fa_spec_constants(FAPATH, HSK,HSV,0,TYPE,true), fa_rows_cols(FAPATH,HSK,HSV,0,TYPE,true)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][1][1][0], "flash_attn_f32_f16_" #HEAD_SIZES "_f32acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,1,TYPE,true), fa_spec_constants(FAPATH, HSK,HSV,1,TYPE,true), 1, true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_f32_f16 ## SUFFIX[TYPE][FA_HEAD_SIZE_##HEAD_SIZES][1][1][1], "flash_attn_f32_f16_" #HEAD_SIZES "_aligned_f32acc_smallrows" #NAMELC #SUFFIX, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _len, flash_attn_f32_f16_ ## NAMELC ## SUFFIX ## _data, "main", 5, sizeof(vk_flash_attn_push_constants), fa_wg_denoms(FAPATH, HSK,HSV,0,TYPE,true), fa_spec_constants(FAPATH, HSK,HSV,0,TYPE,true), fa_rows_cols(FAPATH,HSK,HSV,0,TYPE,true)[1], true, FAPATH==FA_COOPMAT1, (FAPATH==FA_COOPMAT1 ? 32 : 0)); \ #define CREATE_FA(TYPE, NAMELC, FAPATH, SUFFIX) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 64) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 80) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 96) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 112) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 128) \ - CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 256) + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 64, 64, 64) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 80, 80, 80) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 96, 96, 96) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 112, 112, 112) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 128, 128, 128) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 192, 192, 192) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 192, 128, 192_128) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 256, 256, 256) \ + CREATE_FA2(TYPE, NAMELC, FAPATH, SUFFIX, 576, 512, 576_512) CREATE_FA(GGML_TYPE_F16, f16, FA_SCALAR, ) CREATE_FA(GGML_TYPE_Q4_0, q4_0, FA_SCALAR, ) @@ -2618,7 +2751,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_NL], "get_rows_iq4_nl_f32", get_rows_iq4_nl_f32_len, get_rows_iq4_nl_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_matmul_split_k_reduce, "split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256 * 4, 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_flash_attn_split_k_reduce, "fa_split_k_reduce", fa_split_k_reduce_len, fa_split_k_reduce_data, "main", 2, 3 * sizeof(uint32_t), {1, 1, 1}, {}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_flash_attn_split_k_reduce, "fa_split_k_reduce", fa_split_k_reduce_len, fa_split_k_reduce_data, "main", 2, 4 * sizeof(uint32_t), {1, device->subgroup_size, 1}, {device->subgroup_size}, 1, true); ggml_vk_create_pipeline(device, device->pipeline_quantize_q8_1, "quantize_q8_1", quantize_q8_1_len, quantize_q8_1_data, "main", 2, 1 * sizeof(uint32_t), {32 * device->subgroup_size / 8, 1, 1}, { device->subgroup_size }, 1); for (uint32_t i = 0; i < p021_max_gqa_ratio; ++i) { @@ -2632,7 +2765,8 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_norm_f32, "norm_f32", norm_f32_len, norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_group_norm_f32, "group_norm_f32", group_norm_f32_len, group_norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_rms_norm_f32, "rms_norm_f32", rms_norm_f32_len, rms_norm_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {1, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_rms_norm_f32, "rms_norm_f32", rms_norm_f32_len, rms_norm_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {0, 0}, 1); + ggml_vk_create_pipeline(device, device->pipeline_rms_norm_mul_f32, "rms_norm_mul_f32", rms_norm_f32_len, rms_norm_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {0, 1}, 1); ggml_vk_create_pipeline(device, device->pipeline_rms_norm_back_f32, "rms_norm_back_f32", rms_norm_back_f32_len, rms_norm_back_f32_data, "main", 3, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_l2_norm_f32, "l2_norm_f32", l2_norm_f32_len, l2_norm_f32_data, "main", 2, sizeof(vk_op_push_constants), {1, 1, 1}, {}, 1); @@ -2649,19 +2783,41 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_contig_cpy_f32_bf16,"contig_cpy_f32_bf16",contig_cpy_f32_bf16_len,contig_cpy_f32_bf16_data,"main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); if (device->float_controls_rte_fp16) { - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q4_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_1], "cpy_f32_q4_1", cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q4_1), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_0], "cpy_f32_q5_0", cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q5_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_1], "cpy_f32_q5_1", cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q5_1), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q8_0], "cpy_f32_q8_0", cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q8_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_IQ4_NL], "cpy_f32_iq4_nl", cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_IQ4_NL), 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_rte_len, cpy_f32_q4_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_1], "cpy_f32_q4_1", cpy_f32_q4_1_rte_len, cpy_f32_q4_1_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_0], "cpy_f32_q5_0", cpy_f32_q5_0_rte_len, cpy_f32_q5_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_1], "cpy_f32_q5_1", cpy_f32_q5_1_rte_len, cpy_f32_q5_1_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q8_0], "cpy_f32_q8_0", cpy_f32_q8_0_rte_len, cpy_f32_q8_0_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_IQ4_NL], "cpy_f32_iq4_nl", cpy_f32_iq4_nl_rte_len, cpy_f32_iq4_nl_rte_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + } else { + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_len, cpy_f32_q4_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_1], "cpy_f32_q4_1", cpy_f32_q4_1_len, cpy_f32_q4_1_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_0], "cpy_f32_q5_0", cpy_f32_q5_0_len, cpy_f32_q5_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_1], "cpy_f32_q5_1", cpy_f32_q5_1_len, cpy_f32_q5_1_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q8_0], "cpy_f32_q8_0", cpy_f32_q8_0_len, cpy_f32_q8_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_IQ4_NL], "cpy_f32_iq4_nl", cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, "main", 2, sizeof(vk_op_unary_push_constants), {32, 1, 1}, {}, 1); + } + + if (device->float_controls_rte_fp16) { + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_F32], "set_rows_f32", set_rows_f32_rte_len, set_rows_f32_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_F16], "set_rows_f16", set_rows_f16_rte_len, set_rows_f16_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_BF16], "set_rows_bf16", set_rows_bf16_rte_len, set_rows_bf16_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q4_0], "set_rows_q4_0", set_rows_q4_0_rte_len, set_rows_q4_0_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q4_1], "set_rows_q4_1", set_rows_q4_1_rte_len, set_rows_q4_1_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q5_0], "set_rows_q5_0", set_rows_q5_0_rte_len, set_rows_q5_0_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q5_1], "set_rows_q5_1", set_rows_q5_1_rte_len, set_rows_q5_1_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q8_0], "set_rows_q8_0", set_rows_q8_0_rte_len, set_rows_q8_0_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_IQ4_NL], "set_rows_iq4_nl", set_rows_iq4_nl_rte_len, set_rows_iq4_nl_rte_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); } else { - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_0], "cpy_f32_q4_0", cpy_f32_q4_0_len, cpy_f32_q4_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q4_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q4_1], "cpy_f32_q4_1", cpy_f32_q4_1_len, cpy_f32_q4_1_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q4_1), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_0], "cpy_f32_q5_0", cpy_f32_q5_0_len, cpy_f32_q5_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q5_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q5_1], "cpy_f32_q5_1", cpy_f32_q5_1_len, cpy_f32_q5_1_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q5_1), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_Q8_0], "cpy_f32_q8_0", cpy_f32_q8_0_len, cpy_f32_q8_0_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q8_0), 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_cpy_f32_quant[GGML_TYPE_IQ4_NL], "cpy_f32_iq4_nl", cpy_f32_iq4_nl_len, cpy_f32_iq4_nl_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_IQ4_NL), 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_F32], "set_rows_f32", set_rows_f32_len, set_rows_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_F16], "set_rows_f16", set_rows_f16_len, set_rows_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_BF16], "set_rows_bf16", set_rows_bf16_len, set_rows_bf16_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q4_0], "set_rows_q4_0", set_rows_q4_0_len, set_rows_q4_0_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q4_1], "set_rows_q4_1", set_rows_q4_1_len, set_rows_q4_1_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q5_0], "set_rows_q5_0", set_rows_q5_0_len, set_rows_q5_0_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q5_1], "set_rows_q5_1", set_rows_q5_1_len, set_rows_q5_1_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_Q8_0], "set_rows_q8_0", set_rows_q8_0_len, set_rows_q8_0_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); + ggml_vk_create_pipeline(device, device->pipeline_set_rows[GGML_TYPE_IQ4_NL], "set_rows_iq4_nl", set_rows_iq4_nl_len, set_rows_iq4_nl_data, "main", 3, sizeof(vk_op_binary_push_constants), {1, 1, 1}, {1}, 1, true); } ggml_vk_create_pipeline(device, device->pipeline_cpy_quant_f32[GGML_TYPE_Q4_0], "cpy_q4_0_f32", cpy_q4_0_f32_len, cpy_q4_0_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {(uint32_t)ggml_blck_size(GGML_TYPE_Q4_0), 1, 1}, {}, 1); @@ -2679,10 +2835,11 @@ static void ggml_vk_load_shaders(vk_device& device) { return s; }; + bool rte = device->float_controls_rte_fp16; #define CREATE_BINARY(name, namemod, spec) \ for (int s0 : {0,1}) for (int s1 : {0,1}) for (int d : {0,1}) \ ggml_vk_create_pipeline(device, device->pipeline_ ## name ## namemod[s0][s1][d], \ - #name + get_suffix(s0, s1, d) + #namemod, name ## _len[s0][s1][d], name ## _data[s0][s1][d], \ + #name + get_suffix(s0, s1, d) + #namemod, name ## _len[s0][s1][d][rte], name ## _data[s0][s1][d][rte], \ "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, spec, 1); CREATE_BINARY(add, , {0}) @@ -2701,7 +2858,9 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_concat_f16, "concat_f16", concat_f16_len, concat_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_concat_i32, "concat_i32", concat_i32_len, concat_i32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1); - ggml_vk_create_pipeline(device, device->pipeline_upscale_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1); + ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1); + ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_ac_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS}, 1); ggml_vk_create_pipeline(device, device->pipeline_scale_f32, "scale_f32", scale_f32_len, scale_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); @@ -2713,6 +2872,8 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_pad_f32, "pad_f32", pad_f32_len, pad_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_roll_f32, "roll_f32", roll_f32_len, roll_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); + ggml_vk_create_pipeline(device, device->pipeline_repeat_f32, "repeat_f32", repeat_f32_len, repeat_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_repeat_back_f32, "repeat_back_f32", repeat_back_f32_len, repeat_back_f32_data, "main", 2, sizeof(vk_op_unary_push_constants), {512, 1, 1}, {}, 1); @@ -2721,6 +2882,7 @@ static void ggml_vk_load_shaders(vk_device& device) { ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); CREATE_UNARY(gelu) + CREATE_UNARY(gelu_erf) CREATE_UNARY(gelu_quick) CREATE_UNARY(silu) CREATE_UNARY(relu) @@ -2728,6 +2890,22 @@ static void ggml_vk_load_shaders(vk_device& device) { CREATE_UNARY(sigmoid) #undef CREATE_UNARY +#define CREATE_GLU(name) \ + if (device->float_controls_rte_fp16) { \ + ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32_rte", name ## _f32_rte_len, name ## _f32_rte_data, "main", 3, sizeof(vk_op_glu_push_constants), {512, 1, 1}, {}, 1, true); \ + ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16_rte", name ## _f16_rte_len, name ## _f16_rte_data, "main", 3, sizeof(vk_op_glu_push_constants), {512, 1, 1}, {}, 1, true); \ + } else { \ + ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 3, sizeof(vk_op_glu_push_constants), {512, 1, 1}, {}, 1, true); \ + ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 3, sizeof(vk_op_glu_push_constants), {512, 1, 1}, {}, 1, true); \ + } + + CREATE_GLU(geglu) + CREATE_GLU(reglu) + CREATE_GLU(swiglu) + CREATE_GLU(geglu_erf) + CREATE_GLU(geglu_quick) +#undef CREATE_GLU + ggml_vk_create_pipeline(device, device->pipeline_leaky_relu_f32, "leaky_relu_f32", leaky_relu_f32_len, leaky_relu_f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); ggml_vk_create_pipeline(device, device->pipeline_silu_back_f32, "silu_back_f32", silu_back_f32_len, silu_back_f32_data, "main", 3, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); @@ -3408,6 +3586,8 @@ static vk_device ggml_vk_get_device(size_t idx) { device->idx = idx; + device->disable_fusion = getenv("GGML_VK_DISABLE_FUSION") != nullptr; + return device; } @@ -3554,6 +3734,8 @@ static void ggml_vk_print_gpu_info(size_t idx) { static bool ggml_vk_instance_validation_ext_available(const std::vector& instance_extensions); static bool ggml_vk_instance_portability_enumeration_ext_available(const std::vector& instance_extensions); +static bool ggml_vk_instance_debug_utils_ext_available(const std::vector & instance_extensions); + static void ggml_vk_instance_init() { if (vk_instance_initialized) { return; @@ -3574,7 +3756,7 @@ static void ggml_vk_instance_init() { #ifdef __APPLE__ const bool portability_enumeration_ext = ggml_vk_instance_portability_enumeration_ext_available(instance_extensions); #endif - + const bool debug_utils_ext = ggml_vk_instance_debug_utils_ext_available(instance_extensions) && getenv("GGML_VK_DEBUG_MARKERS") != nullptr; std::vector layers; if (validation_ext) { @@ -3589,6 +3771,9 @@ static void ggml_vk_instance_init() { extensions.push_back("VK_KHR_portability_enumeration"); } #endif + if (debug_utils_ext) { + extensions.push_back("VK_EXT_debug_utils"); + } vk::InstanceCreateInfo instance_create_info(vk::InstanceCreateFlags{}, &app_info, layers, extensions); #ifdef __APPLE__ if (portability_enumeration_ext) { @@ -3612,6 +3797,17 @@ static void ggml_vk_instance_init() { vk_instance.instance = vk::createInstance(instance_create_info); vk_instance_initialized = true; + if (debug_utils_ext) { + vk_instance.debug_utils_support = true; + vk_instance.pfn_vkSetDebugUtilsObjectNameEXT = (PFN_vkSetDebugUtilsObjectNameEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkSetDebugUtilsObjectNameEXT"); + vk_instance.pfn_vkQueueBeginDebugUtilsLabelEXT = (PFN_vkQueueBeginDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkQueueBeginDebugUtilsLabelEXT"); + vk_instance.pfn_vkQueueEndDebugUtilsLabelEXT = (PFN_vkQueueEndDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkQueueEndDebugUtilsLabelEXT"); + vk_instance.pfn_vkCmdBeginDebugUtilsLabelEXT = (PFN_vkCmdBeginDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdBeginDebugUtilsLabelEXT"); + vk_instance.pfn_vkCmdEndDebugUtilsLabelEXT = (PFN_vkCmdEndDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdEndDebugUtilsLabelEXT"); + vk_instance.pfn_vkCmdInsertDebugUtilsLabelEXT = (PFN_vkCmdInsertDebugUtilsLabelEXT) vkGetInstanceProcAddr(vk_instance.instance, "vkCmdInsertDebugUtilsLabelEXT"); + + } + vk_perf_logger_enabled = getenv("GGML_VK_PERF_LOGGER") != nullptr; // Emulate behavior of CUDA_VISIBLE_DEVICES for Vulkan @@ -4084,6 +4280,7 @@ static void * ggml_vk_host_malloc(vk_device& device, size_t size) { return nullptr; } + std::lock_guard guard(device->mutex); device->pinned_memory.push_back(std::make_tuple(buf->ptr, size, buf)); return buf->ptr; @@ -4094,6 +4291,8 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) { return; } VK_LOG_MEMORY("ggml_vk_host_free(" << ptr << ")"); + std::lock_guard guard(device->mutex); + vk_buffer buf; size_t index; for (size_t i = 0; i < device->pinned_memory.size(); i++) { @@ -4116,6 +4315,7 @@ static void ggml_vk_host_free(vk_device& device, void* ptr) { } static void ggml_vk_host_get(vk_device& device, const void * ptr, vk_buffer& buf, size_t& buf_offset) { + std::lock_guard guard(device->mutex); buf = nullptr; buf_offset = 0; for (size_t i = 0; i < device->pinned_memory.size(); i++) { @@ -4417,7 +4617,7 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void * memcpy((uint8_t *)dst->ptr + offset + i * width, (const uint8_t *) src + i * spitch, width); } } else { - std::lock_guard guard(dst->device->mutex); + std::lock_guard guard(dst->device->mutex); vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool); ggml_vk_ctx_begin(dst->device, subctx); @@ -4508,7 +4708,7 @@ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_ memcpy(dst, (uint8_t *) src->ptr + offset, size); } else { - std::lock_guard guard(src->device->mutex); + std::lock_guard guard(src->device->mutex); vk_context subctx = ggml_vk_create_temporary_context(src->device->transfer_queue.cmd_pool); ggml_vk_ctx_begin(src->device, subctx); @@ -4538,7 +4738,7 @@ static void ggml_vk_buffer_copy_async(vk_context& ctx, vk_buffer& dst, size_t ds static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& src, size_t src_offset, size_t size) { if (src->device == dst->device) { - std::lock_guard guard(src->device->mutex); + std::lock_guard guard(src->device->mutex); VK_LOG_DEBUG("ggml_vk_buffer_copy(SINGLE_DEVICE, " << size << ")"); // Copy within the device vk_context subctx = ggml_vk_create_temporary_context(src->device->transfer_queue.cmd_pool); @@ -4573,7 +4773,7 @@ static void ggml_vk_buffer_memset_async(vk_context& ctx, vk_buffer& dst, size_t static void ggml_vk_buffer_memset(vk_buffer& dst, size_t offset, uint32_t c, size_t size) { VK_LOG_DEBUG("ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")"); - std::lock_guard guard(dst->device->mutex); + std::lock_guard guard(dst->device->mutex); vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool); ggml_vk_ctx_begin(dst->device, subctx); subctx->s->buffer.fillBuffer(dst->buffer, offset, size, c); @@ -4722,7 +4922,7 @@ static bool ggml_vk_dim01_contiguous(const ggml_tensor * tensor) { return tensor->nb[0] == ggml_type_size(tensor->type) && tensor->nb[1] == (tensor->nb[0]*tensor->ne[0])/ggml_blck_size(tensor->type) && - tensor->nb[3] == tensor->nb[2]*tensor->ne[2]; + (tensor->ne[3] == 1 || tensor->nb[3] == tensor->nb[2]*tensor->ne[2]); } static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const ggml_tensor * src, const ggml_tensor * dst, ggml_type to) { @@ -4800,9 +5000,17 @@ static vk_pipeline ggml_vk_get_cpy_pipeline(ggml_backend_vk_context * ctx, const // type size must be exactly 2 or 4. GGML_ASSERT(ggml_is_quantized(to) || ggml_type_size(src->type) == 2 || ggml_type_size(src->type) == 4); if ((ggml_type_size(src->type) % 4) == 0) { - return ctx->device->pipeline_contig_cpy_f32_f32; + if (contig) { + return ctx->device->pipeline_contig_cpy_f32_f32; + } else { + return ctx->device->pipeline_cpy_f32_f32; + } } else { - return ctx->device->pipeline_contig_cpy_f16_f16; + if (contig) { + return ctx->device->pipeline_contig_cpy_f16_f16; + } else { + return ctx->device->pipeline_cpy_f16_f16; + } } } @@ -4863,7 +5071,7 @@ static void ggml_vk_mul_mat_q_f16(ggml_backend_vk_context * ctx, vk_context& sub std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3]; std::cerr << "), " << (dryrun ? "dryrun" : "") << ")"); - GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT + GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT const uint64_t ne00 = src0->ne[0]; @@ -5091,7 +5299,7 @@ static void ggml_vk_mul_mat_vec_q_f16(ggml_backend_vk_context * ctx, vk_context& std::cerr << "), (" << src1 << ", name=" << src1->name << ", type=" << src1->type << ", ne0=" << src1->ne[0] << ", ne1=" << src1->ne[1] << ", ne2=" << src1->ne[2] << ", ne3=" << src1->ne[3] << ", nb0=" << src1->nb[0] << ", nb1=" << src1->nb[1] << ", nb2=" << src1->nb[2] << ", nb3=" << src1->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3]; std::cerr << "), " << (dryrun ? "dryrun" : "") << "),)"); - GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT + GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT const uint64_t ne00 = src0->ne[0]; @@ -5692,7 +5900,7 @@ static void ggml_vk_mul_mat_vec_id_q_f16(ggml_backend_vk_context * ctx, vk_conte std::cerr << "), (" << ids << ", name=" << ids->name << ", type=" << ids->type << ", ne0=" << ids->ne[0] << ", ne1=" << ids->ne[1] << ", ne2=" << ids->ne[2] << ", ne3=" << ids->ne[3] << ", nb0=" << ids->nb[0] << ", nb1=" << ids->nb[1] << ", nb2=" << ids->nb[2] << ", nb3=" << ids->nb[3]; std::cerr << "), (" << dst << ", name=" << dst->name << ", type=" << dst->type << ", ne0=" << dst->ne[0] << ", ne1=" << dst->ne[1] << ", ne2=" << dst->ne[2] << ", ne3=" << dst->ne[3] << ", nb0=" << dst->nb[0] << ", nb1=" << dst->nb[1] << ", nb2=" << dst->nb[2] << ", nb3=" << dst->nb[3]; std::cerr << "), " << (dryrun ? "dryrun" : "") << ")"); - GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16); // NOLINT + GGML_ASSERT(ggml_vk_dim01_contiguous(src0) || src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || src0->type == GGML_TYPE_BF16); // NOLINT GGML_ASSERT(ggml_vk_dim01_contiguous(src1) || src1->type == GGML_TYPE_F32 || src1->type == GGML_TYPE_F16); // NOLINT GGML_ASSERT(ids->type == GGML_TYPE_I32); @@ -5886,14 +6094,60 @@ static void ggml_vk_mul_mat_id(ggml_backend_vk_context * ctx, vk_context& subctx if (src2->ne[1] == 1 && (src0->type == GGML_TYPE_F32 || src0->type == GGML_TYPE_F16 || ggml_is_quantized(src0->type))) { ggml_vk_mul_mat_vec_id_q_f16(ctx, subctx, src0, src1, src2, dst, dryrun); } else { - ggml_vk_mul_mat_id_q_f16(ctx, subctx, src0, src1, src2, dst, dryrun); + // Split based on number of ids, to fit in shared memory + const uint32_t nei0 = (uint32_t)src2->ne[0]; + const uint32_t nei1 = (uint32_t)src2->ne[1]; + + GGML_ASSERT(nei0 <= 4096); + const uint32_t split_size = std::min(nei1, 4096u / nei0); + + ggml_tensor src1_copy = *src1; + ggml_tensor src2_copy = *src2; + ggml_tensor dst_copy = *dst; + + for (uint32_t token_start = 0; token_start < nei1; token_start += split_size) { + const uint32_t n_tokens = std::min(split_size, nei1 - token_start); + + src1_copy.view_offs = src1->view_offs + token_start * src1_copy.nb[2]; + src2_copy.view_offs = src2->view_offs + token_start * src2_copy.nb[1]; + dst_copy.view_offs = dst->view_offs + token_start * dst_copy.nb[2]; + + src1_copy.ne[2] = n_tokens; + src2_copy.ne[1] = n_tokens; + dst_copy.ne[2] = n_tokens; + + ggml_vk_mul_mat_id_q_f16(ctx, subctx, src0, &src1_copy, &src2_copy, &dst_copy, dryrun); + } } } -static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, const uint32_t D, bool f32acc) { +static bool ggml_vk_flash_attn_scalar_shmem_support(const vk_device& device, const uint32_t hsk, uint32_t hsv) { + // Needs to be kept up to date on shader changes + GGML_UNUSED(hsv); + const uint32_t wg_size = scalar_flash_attention_workgroup_size; + const uint32_t Br = get_fa_scalar_num_large_rows(hsv); + const uint32_t Bc = scalar_flash_attention_Bc; + + const uint32_t tmpsh = wg_size * sizeof(float); + const uint32_t tmpshv4 = wg_size * 4 * sizeof(float); + + const uint32_t masksh = Bc * Br * sizeof(float); + + const uint32_t Qf = Br * (hsk / 4 + 2) * 4 * sizeof(float); + + const uint32_t total_size = tmpsh + tmpshv4 + masksh + Qf; + const bool supported = total_size <= device->properties.limits.maxComputeSharedMemorySize; + + VK_LOG_DEBUG("ggml_vk_flash_attn_coopmat_shmem_support(HSK=" << hsk << ", HSV=" << hsv << ", total_size=" << total_size << ", supported=" << supported); + + return supported; +} + +static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, const uint32_t hsk, uint32_t hsv, bool f32acc) { // Needs to be kept up to date on shader changes + GGML_UNUSED(hsv); const uint32_t wg_size = scalar_flash_attention_workgroup_size; - const uint32_t Br = scalar_flash_attention_num_large_rows; + const uint32_t Br = coopmat1_flash_attention_num_large_rows; const uint32_t Bc = scalar_flash_attention_Bc; const uint32_t acctype = f32acc ? 4 : 2; @@ -5902,12 +6156,12 @@ static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, co const uint32_t tmpsh = wg_size * sizeof(float); const uint32_t tmpshv4 = wg_size * 4 * acctype; - const uint32_t Qf = Br * (D / 4 + 2) * f16vec4; + const uint32_t Qf = Br * (hsk / 4 + 2) * f16vec4; - const uint32_t sfshstride = (D <= 128) ? (Br + 8) : Br; + const uint32_t sfshstride = (hsk <= 128) ? (Br + 8) : Br; const uint32_t sfsh = Bc * sfshstride * acctype; - const uint32_t kshstride = D / 4 + 2; + const uint32_t kshstride = hsk / 4 + 2; const uint32_t ksh = Bc * kshstride * f16vec4; const uint32_t slope = Br * sizeof(float); @@ -5915,7 +6169,7 @@ static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, co const uint32_t total_size = tmpsh + tmpshv4 + Qf + sfsh + ksh + slope; const bool supported = total_size <= device->properties.limits.maxComputeSharedMemorySize; - VK_LOG_DEBUG("ggml_vk_flash_attn_coopmat_shmem_support(D=" << D << ", f32acc=" << f32acc << ", total_size=" << total_size << ", supported=" << supported); + VK_LOG_DEBUG("ggml_vk_flash_attn_coopmat_shmem_support(HSK=" << hsk << ", HSV=" << hsv << ", f32acc=" << f32acc << ", total_size=" << total_size << ", supported=" << supported); return supported; } @@ -5937,13 +6191,15 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx GGML_TENSOR_LOCALS(size_t, nb, dst, nb) const uint32_t nem1 = mask ? mask->ne[1] : 0; - const uint32_t nbm1 = mask ? mask->nb[1] : 0; + const uint32_t nem2 = mask ? mask->ne[2] : 0; + const uint32_t nem3 = mask ? mask->ne[3] : 0; - const uint32_t D = neq0; + const uint32_t HSK = nek0; + const uint32_t HSV = nev0; uint32_t N = neq1; const uint32_t KV = nek1; - GGML_ASSERT(ne0 == D); + GGML_ASSERT(ne0 == HSV); GGML_ASSERT(ne2 == N); // input tensor rows must be contiguous @@ -5951,12 +6207,9 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx GGML_ASSERT(nbk0 == ggml_type_size(k->type)); GGML_ASSERT(nbv0 == ggml_type_size(v->type)); - GGML_ASSERT(neq0 == D); - GGML_ASSERT(nek0 == D); - GGML_ASSERT(nev0 == D); + GGML_ASSERT(neq0 == HSK); GGML_ASSERT(neq1 == N); - GGML_ASSERT(nev0 == D); GGML_ASSERT(nev1 == nek1); @@ -5977,7 +6230,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx const bool coopmat_shape_supported = (dst->op_params[3] == GGML_PREC_F32 && ctx->device->coopmat_support_16x16x16_f32acc) || (dst->op_params[3] != GGML_PREC_F32 && ctx->device->coopmat_support_16x16x16_f16acc); - const bool coopmat_shmem_supported = ggml_vk_flash_attn_coopmat_shmem_support(ctx->device, D, dst->op_params[3] == GGML_PREC_F32); + const bool coopmat_shmem_supported = ggml_vk_flash_attn_coopmat_shmem_support(ctx->device, HSK, HSV, dst->op_params[3] == GGML_PREC_F32); if (!coopmat_shape_supported || !coopmat_shmem_supported) { path = FA_SCALAR; @@ -5997,7 +6250,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx case FA_SCALAR: case FA_COOPMAT1: // We may switch from coopmat1 to scalar, so use the scalar limit for both - max_gqa = scalar_flash_attention_num_large_rows; + max_gqa = get_fa_scalar_num_large_rows(HSV); break; case FA_COOPMAT2: max_gqa = get_fa_num_small_rows(FA_COOPMAT2); @@ -6007,7 +6260,7 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx } if (N == 1 && qk_ratio > 1 && qk_ratio <= max_gqa && - qk_ratio * nek2 == neq2 && nek2 == nev2 && neq3 == 1 && nek3 == 1 && nev3 == 1) { + qk_ratio * nek2 == neq2 && nek2 == nev2 && nem2 <= 1) { // grouped query attention - make the N dimension equal to gqa_ratio, reduce // workgroups proportionally in y dimension. The shader will detect gqa_ratio > 1 // and change addressing calculations to index Q's dimension 2. @@ -6030,47 +6283,25 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx path = FA_SCALAR; } + // with large hsk/hsv, scalar path may need to use small_rows to fit in shared memory + if (path == FA_SCALAR && + !ggml_vk_flash_attn_scalar_shmem_support(ctx->device, HSK, HSV)) { + small_rows = true; + } + bool f32acc = path == FA_SCALAR || dst->op_params[3] == GGML_PREC_F32; + FaHeadSizes head_sizes = fa_get_head_sizes(k->ne[0], v->ne[0]); + switch (path) { case FA_SCALAR: - switch (D) { - case 64: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D64[k->type][f32acc][small_rows][0]; break; - case 80: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D80[k->type][f32acc][small_rows][0]; break; - case 96: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D96[k->type][f32acc][small_rows][0]; break; - case 112: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D112[k->type][f32acc][small_rows][0]; break; - case 128: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D128[k->type][f32acc][small_rows][0]; break; - case 256: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D256[k->type][f32acc][small_rows][0]; break; - default: - GGML_ASSERT(!"unsupported D value"); - return; - } + pipelines = &ctx->device->pipeline_flash_attn_f32_f16[k->type][head_sizes][f32acc][small_rows][0]; break; case FA_COOPMAT1: - switch (D) { - case 64: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D64_cm1[k->type][f32acc][small_rows][0]; break; - case 80: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D80_cm1[k->type][f32acc][small_rows][0]; break; - case 96: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D96_cm1[k->type][f32acc][small_rows][0]; break; - case 112: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D112_cm1[k->type][f32acc][small_rows][0]; break; - case 128: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D128_cm1[k->type][f32acc][small_rows][0]; break; - case 256: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D256_cm1[k->type][f32acc][small_rows][0]; break; - default: - GGML_ASSERT(!"unsupported D value"); - return; - } + pipelines = &ctx->device->pipeline_flash_attn_f32_f16_cm1[k->type][head_sizes][f32acc][small_rows][0]; break; case FA_COOPMAT2: - switch (D) { - case 64: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D64_cm2[k->type][f32acc][small_rows][0]; break; - case 80: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D80_cm2[k->type][f32acc][small_rows][0]; break; - case 96: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D96_cm2[k->type][f32acc][small_rows][0]; break; - case 112: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D112_cm2[k->type][f32acc][small_rows][0]; break; - case 128: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D128_cm2[k->type][f32acc][small_rows][0]; break; - case 256: pipelines = &ctx->device->pipeline_flash_attn_f32_f16_D256_cm2[k->type][f32acc][small_rows][0]; break; - default: - GGML_ASSERT(!"unsupported D value"); - return; - } + pipelines = &ctx->device->pipeline_flash_attn_f32_f16_cm2[k->type][head_sizes][f32acc][small_rows][0]; break; default: GGML_ASSERT(0); @@ -6098,21 +6329,21 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx const uint32_t shader_core_count = ctx->device->shader_core_count ? ctx->device->shader_core_count : 16; // Try to use split_k when KV is large enough to be worth the overhead - if (workgroups_x == 1 && shader_core_count > 0 && KV >= 512) { + if (workgroups_x == 1 && shader_core_count > 0) { // Try to run two workgroups per SM. - split_k = ctx->device->shader_core_count * 2 / workgroups_y; + split_k = shader_core_count * 2 / (workgroups_y * workgroups_z); if (split_k > 1) { // Try to evenly split KV into split_k chunks, but it needs to be a multiple // of "align", so recompute split_k based on that. - split_kv = ROUNDUP_POW2(KV / split_k, pipelines[1]->align); + split_kv = ROUNDUP_POW2(std::max(1u, KV / split_k), pipelines[1]->align); split_k = CEIL_DIV(KV, split_kv); workgroups_x = split_k; } } - // Reserve space for split_k temporaries. For each split, we need to store the O matrix (D x ne1) - // and the per-row m and L values (ne1 rows). - const uint64_t split_k_size = split_k > 1 ? (D * ne1 * sizeof(float) + ne1 * sizeof(float) * 2) * split_k : 0; + // Reserve space for split_k temporaries. For each split x batch, we need to store the O matrix (D x ne1) + // and the per-row m and L values (ne1 rows). We store all the matrices first, followed by the rows. + const uint64_t split_k_size = split_k > 1 ? (HSV * ne1 * sizeof(float) + ne1 * sizeof(float) * 2) * split_k * ne3 : 0; if (split_k_size > ctx->device->max_memory_allocation_size) { GGML_ABORT("Requested preallocation size is too large"); } @@ -6199,18 +6430,19 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx } } + uint32_t mask_n_head_log2 = ((mask != nullptr) << 16) | n_head_log2; + const vk_flash_attn_push_constants pc = { N, KV, (uint32_t)ne1, (uint32_t)ne2, (uint32_t)ne3, (uint32_t)neq2, (uint32_t)neq3, (uint32_t)nek2, (uint32_t)nek3, (uint32_t)nev2, (uint32_t)nev3, - nem1, + nem1, nem2, nem3, q_stride, (uint32_t)nbq2, (uint32_t)nbq3, k_stride, (uint32_t)nbk2, (uint32_t)nbk3, v_stride, (uint32_t)nbv2, (uint32_t)nbv3, - nbm1, scale, max_bias, logit_softcap, - mask != nullptr, n_head_log2, m0, m1, + mask_n_head_log2, m0, m1, gqa_ratio, split_kv, split_k }; ggml_vk_sync_buffers(subctx); @@ -6231,13 +6463,13 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx pc, { workgroups_x * pipeline->wg_denoms[0], workgroups_y, workgroups_z }); ggml_vk_sync_buffers(subctx); - const std::array pc2 = { D, (uint32_t)ne1, split_k }; + const std::array pc2 = { HSV, (uint32_t)ne1, (uint32_t)ne3, split_k }; ggml_vk_dispatch_pipeline(ctx, subctx, ctx->device->pipeline_flash_attn_split_k_reduce, { vk_subbuffer{ctx->prealloc_split_k, 0, VK_WHOLE_SIZE}, vk_subbuffer{d_D, d_buf_offset, VK_WHOLE_SIZE}, }, - pc2, { (uint32_t)ne1, 1, 1 }); + pc2, { (uint32_t)ne1, HSV, (uint32_t)ne3 }); } else { ggml_vk_dispatch_pipeline(ctx, subctx, pipeline, { @@ -6313,8 +6545,16 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const } return nullptr; case GGML_OP_UPSCALE: - if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32 && dst->op_params[0] == GGML_SCALE_MODE_NEAREST) { - return ctx->device->pipeline_upscale_f32; + if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { + int mode = ggml_get_op_params_i32(dst, 0); + switch (mode) { + case GGML_SCALE_MODE_NEAREST: + return ctx->device->pipeline_upscale_nearest_f32; + case GGML_SCALE_MODE_BILINEAR: + return ctx->device->pipeline_upscale_bilinear_f32; + case GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS: + return ctx->device->pipeline_upscale_bilinear_ac_f32; + } } return nullptr; case GGML_OP_SCALE: @@ -6347,6 +6587,11 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return ctx->device->pipeline_pad_f32; } return nullptr; + case GGML_OP_ROLL: + if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { + return ctx->device->pipeline_roll_f32; + } + return nullptr; case GGML_OP_REPEAT: if (ggml_type_size(src0->type) == sizeof(float) && ggml_type_size(dst->type) == sizeof(float)) { return ctx->device->pipeline_repeat_f32; @@ -6361,6 +6606,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const case GGML_OP_CONT: case GGML_OP_DUP: return ggml_vk_get_cpy_pipeline(ctx, src0, dst, dst->type); + case GGML_OP_SET_ROWS: + return ctx->device->pipeline_set_rows[dst->type]; case GGML_OP_SILU_BACK: if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { return ctx->device->pipeline_silu_back_f32; @@ -6378,7 +6625,7 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return nullptr; case GGML_OP_RMS_NORM: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { - return ctx->device->pipeline_rms_norm_f32; + return ctx->num_additional_fused_ops > 0 ? ctx->device->pipeline_rms_norm_mul_f32 : ctx->device->pipeline_rms_norm_f32; } return nullptr; case GGML_OP_RMS_NORM_BACK: @@ -6403,6 +6650,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const return ctx->device->pipeline_silu[dst->type == GGML_TYPE_F16]; case GGML_UNARY_OP_GELU: return ctx->device->pipeline_gelu[dst->type == GGML_TYPE_F16]; + case GGML_UNARY_OP_GELU_ERF: + return ctx->device->pipeline_gelu_erf[dst->type == GGML_TYPE_F16]; case GGML_UNARY_OP_GELU_QUICK: return ctx->device->pipeline_gelu_quick[dst->type == GGML_TYPE_F16]; case GGML_UNARY_OP_RELU: @@ -6415,6 +6664,28 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const break; } return nullptr; + case GGML_OP_GLU: + if ((src0->type != GGML_TYPE_F32 && src0->type != GGML_TYPE_F16) || + (dst->type != GGML_TYPE_F32 && dst->type != GGML_TYPE_F16) || + (src0->type != dst->type)) { + return nullptr; + } + + switch (ggml_get_glu_op(dst)) { + case GGML_GLU_OP_GEGLU: + return ctx->device->pipeline_geglu[dst->type == GGML_TYPE_F16]; + case GGML_GLU_OP_REGLU: + return ctx->device->pipeline_reglu[dst->type == GGML_TYPE_F16]; + case GGML_GLU_OP_SWIGLU: + return ctx->device->pipeline_swiglu[dst->type == GGML_TYPE_F16]; + case GGML_GLU_OP_GEGLU_ERF: + return ctx->device->pipeline_geglu_erf[dst->type == GGML_TYPE_F16]; + case GGML_GLU_OP_GEGLU_QUICK: + return ctx->device->pipeline_geglu_quick[dst->type == GGML_TYPE_F16]; + default: + break; + } + return nullptr; case GGML_OP_DIAG_MASK_INF: if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) { return ctx->device->pipeline_diag_mask_inf_f32; @@ -6575,6 +6846,7 @@ static bool ggml_vk_op_supports_incontiguous(ggml_op op) { case GGML_OP_RMS_NORM: case GGML_OP_CONV_2D_DW: case GGML_OP_IM2COL: + case GGML_OP_SET_ROWS: return true; default: return false; @@ -6869,12 +7141,14 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co case GGML_OP_COS: case GGML_OP_CLAMP: case GGML_OP_PAD: + case GGML_OP_ROLL: case GGML_OP_REPEAT: case GGML_OP_REPEAT_BACK: case GGML_OP_CPY: case GGML_OP_CONCAT: case GGML_OP_UPSCALE: case GGML_OP_UNARY: + case GGML_OP_GLU: case GGML_OP_CONV_2D_DW: { uint32_t ne = ggml_nelements(dst); @@ -6887,6 +7161,12 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co ne *= ggml_type_size(src0->type) / 2; } } + // copy_to_quant has block size of 32, and each thread does QUANT_K elements. + // Splitting into 512x512xZ wouldn't work well since each workgroup does 1024 elements. + // So divide by block size here before splitting into 512x512 groups. + if (op == GGML_OP_CPY && !ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) { + ne = CEIL_DIV(ne, ggml_blck_size(dst->type)); + } if (ne > 262144) { elements = { 512, 512, CEIL_DIV(ne, 262144) }; } else if (ne > 512) { @@ -6895,6 +7175,25 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co elements = { ne, 1, 1 }; } } break; + case GGML_OP_SET_ROWS: + { + uint32_t ne = ggml_nelements(src0); + if (ggml_is_quantized(dst->type)) { + // quants run 32 threads each doing QUANT_K elements + ne = CEIL_DIV(ne, 32 * ggml_blck_size(dst->type)); + } else { + // scalar types do one element per thread, running 512 threads + ne = CEIL_DIV(ne, 512); + } + if (ne > 262144) { + elements = { 512, 512, CEIL_DIV(ne, 262144) }; + } else if (ne > 512) { + elements = { 512, CEIL_DIV(ne, 512), 1 }; + } else { + elements = { ne, 1, 1 }; + } + } + break; default: elements = { (uint32_t)ggml_nelements(src0), 1, 1 }; break; @@ -6915,7 +7214,7 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co } } - if (op == GGML_OP_SOFT_MAX) { + if (op == GGML_OP_SOFT_MAX || op == GGML_OP_GLU) { // Empty src1 is possible in soft_max, but the shader needs a buffer vk_subbuffer subbuf_y; if (use_src1) { @@ -7304,14 +7603,21 @@ static void ggml_vk_concat(ggml_backend_vk_context * ctx, vk_context& subctx, co static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { const uint32_t src0_type_size = ggml_type_size(src0->type); + const uint32_t mode = (uint32_t)ggml_get_op_params_i32(dst, 0); - const float sf0 = (float)dst->ne[0] / src0->ne[0]; - const float sf1 = (float)dst->ne[1] / src0->ne[1]; - const float sf2 = (float)dst->ne[2] / src0->ne[2]; - const float sf3 = (float)dst->ne[3] / src0->ne[3]; + float sf0 = (float)dst->ne[0] / src0->ne[0]; + float sf1 = (float)dst->ne[1] / src0->ne[1]; + float sf2 = (float)dst->ne[2] / src0->ne[2]; + float sf3 = (float)dst->ne[3] / src0->ne[3]; + + if (mode & GGML_SCALE_FLAG_ALIGN_CORNERS) { + sf0 = (float)(dst->ne[0] - 1) / (src0->ne[0] - 1); + sf1 = (float)(dst->ne[1] - 1) / (src0->ne[1] - 1); + } ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_UPSCALE, { (uint32_t)ggml_nelements(dst), 0, 0, + (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, (uint32_t)dst->ne[0], (uint32_t)dst->ne[1], (uint32_t)dst->ne[2],(uint32_t)dst->ne[3], sf0, sf1, sf2, sf3, @@ -7319,123 +7625,64 @@ static void ggml_vk_upscale(ggml_backend_vk_context * ctx, vk_context& subctx, c } static void ggml_vk_scale(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - float * op_params = (float *)dst->op_params; - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst); + p.param1 = ggml_get_op_params_f32(dst, 0); + p.param2 = ggml_get_op_params_f32(dst, 1); - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, { - (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - op_params[0], 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SCALE, std::move(p), dryrun); } static void ggml_vk_sqr(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, { - (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SQR, vk_op_unary_push_constants_init(src0, dst), dryrun); } static void ggml_vk_sin(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, { - (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_SIN, vk_op_unary_push_constants_init(src0, dst), dryrun); } static void ggml_vk_cos(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, { - (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_COS, vk_op_unary_push_constants_init(src0, dst), dryrun); } static void ggml_vk_clamp(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - float * op_params = (float *)dst->op_params; - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst); + p.param1 = ggml_get_op_params_f32(dst, 0); + p.param2 = ggml_get_op_params_f32(dst, 1); - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, { - (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - op_params[0], op_params[1], - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CLAMP, std::move(p), dryrun); } static void ggml_vk_pad(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst)); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, std::move(p), dryrun); +} - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_PAD, { - (uint32_t)ggml_nelements(dst), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); +static void ggml_vk_roll(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { + const int32_t s0 = ggml_get_op_params_i32(dst, 0); + const int32_t s1 = ggml_get_op_params_i32(dst, 1); + const int32_t s2 = ggml_get_op_params_i32(dst, 2); + const int32_t s3 = ggml_get_op_params_i32(dst, 3); + const uint32_t s01_packed = ((s0 + 0x8000) << 16) | (s1 + 0x8000); + const uint32_t s23_packed = ((s2 + 0x8000) << 16) | (s3 + 0x8000); + + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst); + memcpy(&p.param1, &s01_packed, sizeof(float)); + memcpy(&p.param2, &s23_packed, sizeof(float)); + + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_ROLL, std::move(p), dryrun); } static void ggml_vk_repeat(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, { - (uint32_t)ggml_nelements(dst), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst)); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT, std::move(p), dryrun); } static void ggml_vk_repeat_back(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, { - (uint32_t)ggml_nelements(dst), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, - 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - }, dryrun); + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ggml_nelements(dst)); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_REPEAT_BACK, std::move(p), dryrun); } static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - const uint32_t src0_type_size = ggml_type_size(src0->type); - const uint32_t dst_type_size = ggml_type_size(dst->type); - uint32_t ne = (uint32_t)ggml_nelements(src0); if (ggml_is_quantized(src0->type) && ggml_is_quantized(dst->type)) { // Convert from number of logical elements to 2- or 4-byte units. @@ -7447,13 +7694,22 @@ static void ggml_vk_cpy(ggml_backend_vk_context * ctx, vk_context& subctx, const } } - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, { - ne, - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, + vk_op_unary_push_constants p = vk_op_unary_push_constants_init(src0, dst, ne); + ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_CPY, std::move(p), dryrun); +} + +static void ggml_vk_set_rows(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) { + const uint32_t src0_type_size = ggml_type_size(src0->type); + const uint32_t src1_type_size = ggml_type_size(src1->type); + const uint32_t dst_type_size = ggml_type_size(dst->type); + + ggml_vk_op_f32(ctx, subctx, src0, src1, nullptr, dst, GGML_OP_SET_ROWS, { + (uint32_t)ggml_nelements(src0), + (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, + (uint32_t)src1->ne[0], (uint32_t)src1->ne[1], (uint32_t)src1->ne[2],(uint32_t)src1->ne[3], (uint32_t)src1->nb[0] / src1_type_size, (uint32_t)src1->nb[1] / src1_type_size, (uint32_t)src1->nb[2] / src1_type_size, (uint32_t)src1->nb[3] / src1_type_size, + (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, - 0.0f, 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0.0f, 0.0f, 0, }, dryrun); } @@ -7478,18 +7734,18 @@ static void ggml_vk_group_norm(ggml_backend_vk_context * ctx, vk_context& subctx ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_GROUP_NORM, { group_size, 0, eps, 0.0f }, dryrun); } -static void ggml_vk_rms_norm(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { - float * op_params = (float *)dst->op_params; +static void ggml_vk_rms_norm(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, float * op_params, bool dryrun = false) { const uint32_t src0_type_size = ggml_type_size(src0->type); + const uint32_t src1_type_size = ggml_type_size(src1->type); const uint32_t dst_type_size = ggml_type_size(dst->type); - ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_RMS_NORM, { + ggml_vk_op_f32(ctx, subctx, src0, src1, nullptr, dst, GGML_OP_RMS_NORM, { (uint32_t)ggml_nelements(src0), - (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], (uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, - (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2], (uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, + (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2],(uint32_t)src0->ne[3], (uint32_t)src0->nb[0] / src0_type_size, (uint32_t)src0->nb[1] / src0_type_size, (uint32_t)src0->nb[2] / src0_type_size, (uint32_t)src0->nb[3] / src0_type_size, + (uint32_t)src1->ne[0], (uint32_t)src1->ne[1], (uint32_t)src1->ne[2],(uint32_t)src1->ne[3], (uint32_t)src1->nb[0] / src1_type_size, (uint32_t)src1->nb[1] / src1_type_size, (uint32_t)src1->nb[2] / src1_type_size, (uint32_t)src1->nb[3] / src1_type_size, + (uint32_t) dst->ne[0], (uint32_t) dst->ne[1], (uint32_t) dst->ne[2],(uint32_t) dst->ne[3], (uint32_t) dst->nb[0] / dst_type_size, (uint32_t) dst->nb[1] / dst_type_size, (uint32_t) dst->nb[2] / dst_type_size, (uint32_t) dst->nb[3] / dst_type_size, 0, - op_params[0], 0.0f, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + op_params[0], 0.0f, 0, }, dryrun); } @@ -7507,6 +7763,25 @@ static void ggml_vk_unary(ggml_backend_vk_context * ctx, vk_context& subctx, con ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_UNARY, { (uint32_t)ggml_nelements(src0), 0, 0.0f, 0.0f }, dryrun); } +static void ggml_vk_glu(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst, bool dryrun = false) { + const bool swapped = (bool)dst->op_params[1]; + const bool split = src1 != nullptr; + + GGML_ASSERT(ggml_is_contiguous(src0)); + + if (!split) { + GGML_ASSERT(src0->ne[0] / 2 == dst->ne[0]); + } else { + GGML_ASSERT(src0->ne[0] == src1->ne[0]); + GGML_ASSERT(src0->ne[0] == dst->ne[0]); + GGML_ASSERT(src0->type == src1->type); + } + + const uint32_t mode = split ? 2 : (swapped ? 1 : 0); + + ggml_vk_op_f32(ctx, subctx, src0, src1, nullptr, dst, GGML_OP_GLU, { (uint32_t)ggml_nelements(dst), (uint32_t)src0->ne[0], (uint32_t)dst->ne[0], mode }, dryrun); +} + static void ggml_vk_diag_mask_inf(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst, bool dryrun = false) { int32_t * op_params = (int32_t *)dst->op_params; ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, dst, GGML_OP_DIAG_MASK_INF, { (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], op_params[0] }, dryrun); @@ -7522,7 +7797,13 @@ static void ggml_vk_soft_max(ggml_backend_vk_context * ctx, vk_context& subctx, const uint32_t nrows_x = (uint32_t)ggml_nrows(src0); const uint32_t nrows_y = (uint32_t)src0->ne[1]; - const uint32_t n_head_kv = nrows_x/nrows_y; + const uint32_t ne12 = src1 ? (uint32_t)(src1->ne[2]) : 0u; + const uint32_t ne13 = src1 ? (uint32_t)(src1->ne[3]) : 0u; + const uint32_t nb11 = src1 ? (uint32_t)(src1->nb[1] / src1->nb[0]) : 0u; + const uint32_t nb12 = src1 ? (uint32_t)(src1->nb[2] / src1->nb[0]) : 0u; + const uint32_t nb13 = src1 ? (uint32_t)(src1->nb[3] / src1->nb[0]) : 0u; + + const uint32_t n_head_kv = src0->ne[2]; const uint32_t n_head_log2 = 1u << (uint32_t) floorf(log2f((float) n_head_kv)); const float m0 = powf(2.0f, -(max_bias ) / n_head_log2); @@ -7531,6 +7812,9 @@ static void ggml_vk_soft_max(ggml_backend_vk_context * ctx, vk_context& subctx, ggml_vk_op_f32(ctx, subctx, src0, src1, nullptr, dst, GGML_OP_SOFT_MAX, { ncols, src1 != nullptr ? nrows_y : (uint32_t)0, + (uint32_t)src0->ne[0], (uint32_t)src0->ne[1], (uint32_t)src0->ne[2], + ne12, ne13, + nb11, nb12, nb13, scale, max_bias, m0, m1, n_head_log2, @@ -8680,11 +8964,12 @@ static void ggml_vk_preallocate_buffers(ggml_backend_vk_context * ctx) { } } -static bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_tensor* tensor, int tensor_idx, bool use_fence, bool almost_ready); +static bool ggml_vk_compute_forward(ggml_backend_vk_context* ctx, ggml_cgraph * cgraph, ggml_tensor* tensor, int tensor_idx, bool use_fence, bool almost_ready); // Returns true if node has enqueued work into the queue, false otherwise // If submit is true the current all operations queued so far are being submitted to Vulkan to overlap cmdlist creation and GPU execution. -static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * node, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool last_node, bool almost_ready, bool submit){ +static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int node_idx, ggml_tensor *node_begin, int node_idx_begin, bool dryrun, bool last_node, bool almost_ready, bool submit){ + ggml_tensor * node = cgraph->nodes[node_idx]; if (ggml_is_empty(node) || !node->buffer) { return false; } @@ -8709,6 +8994,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_TANH: @@ -8718,6 +9004,18 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(node)) { + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + break; + default: + return false; + } + break; case GGML_OP_REPEAT: case GGML_OP_REPEAT_BACK: case GGML_OP_GET_ROWS: @@ -8734,7 +9032,9 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_OP_COS: case GGML_OP_CLAMP: case GGML_OP_PAD: + case GGML_OP_ROLL: case GGML_OP_CPY: + case GGML_OP_SET_ROWS: case GGML_OP_CONT: case GGML_OP_DUP: case GGML_OP_SILU_BACK: @@ -8801,6 +9101,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_OP_CLAMP: case GGML_OP_PAD: case GGML_OP_CPY: + case GGML_OP_SET_ROWS: case GGML_OP_CONT: case GGML_OP_DUP: case GGML_OP_SILU_BACK: @@ -8810,6 +9111,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_OP_RMS_NORM_BACK: case GGML_OP_L2_NORM: case GGML_OP_UNARY: + case GGML_OP_GLU: case GGML_OP_DIAG_MASK_INF: case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX_BACK: @@ -8902,12 +9204,20 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod case GGML_OP_PAD: ggml_vk_pad(ctx, compute_ctx, src0, node, dryrun); + break; + case GGML_OP_ROLL: + ggml_vk_roll(ctx, compute_ctx, src0, node, dryrun); + break; case GGML_OP_CPY: case GGML_OP_CONT: case GGML_OP_DUP: ggml_vk_cpy(ctx, compute_ctx, src0, node, dryrun); + break; + case GGML_OP_SET_ROWS: + ggml_vk_set_rows(ctx, compute_ctx, src0, src1, node, dryrun); + break; case GGML_OP_SILU_BACK: ggml_vk_silu_back(ctx, compute_ctx, src0, src1, node, dryrun); @@ -8922,8 +9232,14 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod break; case GGML_OP_RMS_NORM: - ggml_vk_rms_norm(ctx, compute_ctx, src0, node, dryrun); - + if (ctx->num_additional_fused_ops > 0) { + // fused rms_norm + mul + ggml_tensor *mul = cgraph->nodes[node_idx + 1]; + ggml_tensor *other_src = mul->src[0] == node ? mul->src[1] : mul->src[0]; + ggml_vk_rms_norm(ctx, compute_ctx, src0, other_src, mul, (float *)node->op_params, dryrun); + } else { + ggml_vk_rms_norm(ctx, compute_ctx, src0, src0, node, (float *)node->op_params, dryrun); + } break; case GGML_OP_RMS_NORM_BACK: ggml_vk_rms_norm_back(ctx, compute_ctx, src0, src1, node, dryrun); @@ -8937,6 +9253,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod switch (ggml_get_unary_op(node)) { case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_TANH: @@ -8947,6 +9264,19 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(node)) { + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + ggml_vk_glu(ctx, compute_ctx, src0, src1, node, dryrun); + break; + default: + return false; + } + break; case GGML_OP_DIAG_MASK_INF: ggml_vk_diag_mask_inf(ctx, compute_ctx, src0, node, dryrun); @@ -9068,12 +9398,13 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod ctx->compute_ctx.reset(); - bool ok = ggml_vk_compute_forward(ctx, node_begin, node_idx_begin, false, almost_ready); + bool ok = ggml_vk_compute_forward(ctx, cgraph, node_begin, node_idx_begin, false, almost_ready); if (!ok) { if (node->op == GGML_OP_UNARY) { std::cerr << __func__ << ": error: op not supported UNARY " << node->name << " (" << ggml_unary_op_name(static_cast(node->op_params[0])) << ")" << std::endl; - } - else { + } else if (node->op == GGML_OP_GLU) { + std::cerr << __func__ << ": error: op not supported GLU " << node->name << " (" << ggml_glu_op_name(static_cast(node->op_params[0])) << ")" << std::endl; + } else { std::cerr << __func__ << ": error: op not supported " << node->name << " (" << ggml_op_name(node->op) << ")" << std::endl; } } @@ -9082,7 +9413,8 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_tensor * nod return true; } -static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * tensor, int tensor_idx, bool use_fence = true, bool almost_ready = false) { +static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, ggml_tensor * tensor, int tensor_idx, bool use_fence = true, bool almost_ready = false) { + GGML_UNUSED(cgraph); ggml_backend_buffer * buf = nullptr; switch (tensor->op) { @@ -9100,7 +9432,9 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * case GGML_OP_COS: case GGML_OP_CLAMP: case GGML_OP_PAD: + case GGML_OP_ROLL: case GGML_OP_CPY: + case GGML_OP_SET_ROWS: case GGML_OP_CONT: case GGML_OP_DUP: case GGML_OP_SILU_BACK: @@ -9142,6 +9476,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * switch (ggml_get_unary_op(tensor)) { case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_RELU: case GGML_UNARY_OP_TANH: @@ -9152,6 +9487,19 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(tensor)) { + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + buf = tensor->buffer; + break; + default: + return false; + } + break; case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: case GGML_OP_FLASH_ATTN_EXT: @@ -9178,7 +9526,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * // Only run if ctx hasn't been submitted yet if (!subctx->seqs.empty()) { #ifdef GGML_VULKAN_CHECK_RESULTS - ggml_vk_check_results_0(tensor); + ggml_vk_check_results_0(ctx, cgraph, tensor_idx); use_fence = true; #endif @@ -9198,7 +9546,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_tensor * ggml_vk_wait_for_fence(ctx); } #ifdef GGML_VULKAN_CHECK_RESULTS - ggml_vk_check_results_1(tensor); + ggml_vk_check_results_1(ctx, cgraph, tensor_idx); #endif } @@ -9488,6 +9836,12 @@ static size_t ggml_backend_vk_host_buffer_type_get_alignment(ggml_backend_buffer UNUSED(buft); } +static size_t ggml_backend_vk_host_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + return vk_instance.devices[0]->suballocation_block_size; + + UNUSED(buft); +} + // Should be changed to return device-specific host buffer type // but that probably requires changes in llama.cpp ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { @@ -9496,7 +9850,7 @@ ggml_backend_buffer_type_t ggml_backend_vk_host_buffer_type() { /* .get_name = */ ggml_backend_vk_host_buffer_type_name, /* .alloc_buffer = */ ggml_backend_vk_host_buffer_type_alloc_buffer, /* .get_alignment = */ ggml_backend_vk_host_buffer_type_get_alignment, - /* .get_max_size = */ NULL, // defaults to SIZE_MAX + /* .get_max_size = */ ggml_backend_vk_host_buffer_type_get_max_size, /* .get_alloc_size = */ ggml_backend_cpu_buffer_type()->iface.get_alloc_size, /* .is_host = */ ggml_backend_cpu_buffer_type()->iface.is_host, }, @@ -9639,16 +9993,59 @@ static bool ggml_vk_is_empty(ggml_tensor * node) { return ggml_is_empty(node) || node->op == GGML_OP_NONE || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE; } +static bool ggml_vk_can_fuse(const struct ggml_cgraph * cgraph, int node_idx, std::initializer_list ops) { + if (!ggml_can_fuse(cgraph, node_idx, ops)) { + return false; + } + + if (ops.size() == 2 && ops.begin()[0] == GGML_OP_RMS_NORM && ops.begin()[1] == GGML_OP_MUL) { + // additional constraints specific to this fusion + const ggml_tensor *rms_norm = cgraph->nodes[node_idx]; + const ggml_tensor *mul = cgraph->nodes[node_idx + 1]; + + GGML_ASSERT(rms_norm->src[0]->type == GGML_TYPE_F32); + GGML_ASSERT(rms_norm->type == GGML_TYPE_F32); + // rms_norm only supports f32 + if (mul->src[0]->type != GGML_TYPE_F32 || + mul->src[1]->type != GGML_TYPE_F32 || + mul->type != GGML_TYPE_F32) { + return false; + } + // if rms_norm is the B operand, then we don't handle broadcast + if (rms_norm == mul->src[1] && + mul->src[0]->ne[1] != rms_norm->ne[1]) { + return false; + } + // rms_norm shader assumes contiguous rows + if (!ggml_is_contiguous_rows(mul->src[0]) || !ggml_is_contiguous_rows(mul->src[1])) { + return false; + } + } + return true; +} + static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cgraph * cgraph) { VK_LOG_DEBUG("ggml_backend_vk_graph_compute(" << cgraph->n_nodes << " nodes)"); ggml_backend_vk_context * ctx = (ggml_backend_vk_context *)backend->context; + if (vk_instance.debug_utils_support) { + vk::DebugUtilsLabelEXT dul = {}; + dul.pLabelName = "ggml_backend_vk_graph_compute"; + dul.color = std::array{1.0f, 1.0f, 1.0f, 1.0f}; + vk_instance.pfn_vkQueueBeginDebugUtilsLabelEXT(ctx->device->compute_queue.queue, reinterpret_cast(&dul)); + } + uint64_t total_mat_mul_bytes = 0; for (int i = 0; i < cgraph->n_nodes; i++) { - ggml_vk_build_graph(ctx, cgraph->nodes[i], i, nullptr, 0, true, false, false, false); + if (!ctx->device->disable_fusion && ggml_vk_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL })) { + ctx->num_additional_fused_ops = 1; + } + ggml_vk_build_graph(ctx, cgraph, i, nullptr, 0, true, false, false, false); if (cgraph->nodes[i]->op == GGML_OP_MUL_MAT || cgraph->nodes[i]->op == GGML_OP_MUL_MAT_ID) { total_mat_mul_bytes += ggml_nbytes(cgraph->nodes[i]->src[0]); } + i += ctx->num_additional_fused_ops; + ctx->num_additional_fused_ops = 0; } if (ctx->device->need_compiles) { ggml_vk_load_shaders(ctx->device); @@ -9710,14 +10107,18 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg mul_mat_bytes += ggml_nbytes(cgraph->nodes[i]->src[0]); } + if (!ctx->device->disable_fusion && ggml_vk_can_fuse(cgraph, i, { GGML_OP_RMS_NORM, GGML_OP_MUL })) { + ctx->num_additional_fused_ops = 1; + } + // Signal the almost_ready fence when the graph is mostly complete (< 20% remaining) bool almost_ready = (cgraph->n_nodes - i) < cgraph->n_nodes / 5; bool submit = (submitted_nodes >= nodes_per_submit) || (mul_mat_bytes >= mul_mat_bytes_per_submit) || - (i == last_node) || + (i + ctx->num_additional_fused_ops == last_node) || (almost_ready && !ctx->almost_ready_fence_pending); - bool enqueued = ggml_vk_build_graph(ctx, cgraph->nodes[i], i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i == last_node, almost_ready, submit); + bool enqueued = ggml_vk_build_graph(ctx, cgraph, i, cgraph->nodes[submit_node_idx], submit_node_idx, false, i + ctx->num_additional_fused_ops == last_node, almost_ready, submit); if (vk_perf_logger_enabled) { if (ctx->compute_ctx.expired()) { @@ -9727,7 +10128,10 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg } else { compute_ctx = ctx->compute_ctx.lock(); } - compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->device->query_pool, i+1); + // If there are fused ops, just write out timestamps for all nodes to keep the accounting simple + for (int j = 0; j < ctx->num_additional_fused_ops + 1; ++j) { + compute_ctx->s->buffer.writeTimestamp(vk::PipelineStageFlagBits::eAllCommands, ctx->device->query_pool, i+j+1); + } } if (enqueued) { @@ -9749,6 +10153,8 @@ static ggml_status ggml_backend_vk_graph_compute(ggml_backend_t backend, ggml_cg } submit_count++; } + i += ctx->num_additional_fused_ops; + ctx->num_additional_fused_ops = 0; } if (vk_perf_logger_enabled) { @@ -9910,6 +10316,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_UNARY: switch (ggml_get_unary_op(op)) { case GGML_UNARY_OP_GELU: + case GGML_UNARY_OP_GELU_ERF: case GGML_UNARY_OP_GELU_QUICK: case GGML_UNARY_OP_SILU: case GGML_UNARY_OP_RELU: @@ -9923,15 +10330,32 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm return false; } break; + case GGML_OP_GLU: + switch (ggml_get_glu_op(op)) { + case GGML_GLU_OP_GEGLU: + case GGML_GLU_OP_REGLU: + case GGML_GLU_OP_SWIGLU: + case GGML_GLU_OP_GEGLU_ERF: + case GGML_GLU_OP_GEGLU_QUICK: + return ggml_is_contiguous(op->src[0]) && + (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_F16) && + (op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16) && + (op->src[0]->type == op->type); + default: + return false; + } + break; case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT_ID: { ggml_type src0_type = op->src[0]->type; ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; const vk_device& device = ggml_vk_get_device(ctx->device); - if (op->op == GGML_OP_MUL_MAT_ID && !device->mul_mat_id_s[src0_type] && !device->mul_mat_id_m[src0_type] && !device->mul_mat_id_l[src0_type]) { - // If there's not enough shared memory for row_ids and the result tile, fallback to CPU - return false; + if (op->op == GGML_OP_MUL_MAT_ID) { + if (!device->mul_mat_id_s[src0_type] && !device->mul_mat_id_m[src0_type] && !device->mul_mat_id_l[src0_type]) { + // If there's not enough shared memory for row_ids and the result tile, fallback to CPU + return false; + } } switch (src0_type) { case GGML_TYPE_F32: @@ -9989,19 +10413,8 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm ggml_backend_vk_device_context * ctx = (ggml_backend_vk_device_context *)dev->context; auto device = ggml_vk_get_device(ctx->device); bool coopmat2 = device->coopmat2; - switch (op->src[0]->ne[0]) { - case 64: - case 80: - case 96: - case 112: - case 128: - case 256: - break; - default: - return false; - } - if (op->src[1]->ne[0] != op->src[2]->ne[0]) { - // different head sizes of K and V are not supported yet + FaHeadSizes head_sizes = fa_get_head_sizes(op->src[1]->ne[0], op->src[2]->ne[0]); + if (head_sizes == FA_HEAD_SIZE_UNSUPPORTED) { return false; } if (op->src[0]->type != GGML_TYPE_F32) { @@ -10081,6 +10494,23 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm return false; } } break; + case GGML_OP_SET_ROWS: + { + switch (op->type) { + case GGML_TYPE_F32: + case GGML_TYPE_F16: + case GGML_TYPE_BF16: + case GGML_TYPE_Q4_0: + case GGML_TYPE_Q4_1: + case GGML_TYPE_Q5_0: + case GGML_TYPE_Q5_1: + case GGML_TYPE_Q8_0: + case GGML_TYPE_IQ4_NL: + return true; + default: + return false; + } + } break; case GGML_OP_CONT: case GGML_OP_CPY: case GGML_OP_DUP: @@ -10165,11 +10595,11 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm case GGML_OP_CLAMP: return op->src[0]->type == GGML_TYPE_F32; case GGML_OP_UPSCALE: - return op->op_params[0] == GGML_SCALE_MODE_NEAREST; case GGML_OP_ACC: case GGML_OP_CONCAT: case GGML_OP_SCALE: case GGML_OP_PAD: + case GGML_OP_ROLL: case GGML_OP_DIAG_MASK_INF: case GGML_OP_SOFT_MAX: case GGML_OP_SOFT_MAX_BACK: @@ -10332,6 +10762,22 @@ static bool ggml_vk_instance_portability_enumeration_ext_available(const std::ve UNUSED(instance_extensions); } +// Extension availability +static bool ggml_vk_instance_debug_utils_ext_available( + const std::vector & instance_extensions) { + // Check for portability enumeration extension for MoltenVK support + for (const auto & properties : instance_extensions) { + if (strcmp("VK_EXT_debug_utils", properties.extensionName) == 0) { + return true; + } + } + + std::cerr << "ggml_vulkan: WARNING: Instance extension VK_EXT_debug_utils not found." << std::endl; + return false; + + UNUSED(instance_extensions); +} + static bool ggml_vk_khr_cooperative_matrix_support(const vk::PhysicalDeviceProperties& props, const vk::PhysicalDeviceDriverProperties& driver_props, vk_device_architecture arch) { switch (props.vendorID) { case VK_VENDOR_ID_INTEL: @@ -10444,11 +10890,21 @@ void * comp_result; size_t comp_size; size_t comp_nb[GGML_MAX_DIMS]; size_t check_counter = 0; -static void ggml_vk_check_results_0(ggml_tensor * tensor) { +static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int tensor_idx) { + ggml_tensor * tensor = cgraph->nodes[tensor_idx]; if (tensor->op == GGML_OP_TRANSPOSE) { return; } + bool fused_rms_norm_mul = false; + int rms_norm_idx = -1; + if (ctx->num_additional_fused_ops == 1 && + tensor->op == GGML_OP_RMS_NORM && + cgraph->nodes[tensor_idx + 1]->op == GGML_OP_MUL) { + fused_rms_norm_mul = true; + tensor = cgraph->nodes[tensor_idx + 1]; + } + check_counter++; if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) { return; @@ -10476,6 +10932,15 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { for (int i = 0; i < 6; i++) { ggml_tensor * srci = tensor->src[i]; + if (fused_rms_norm_mul) { + rms_norm_idx = tensor->src[0]->op == GGML_OP_RMS_NORM ? 0 : 1; + ggml_tensor *rms_norm = tensor->src[rms_norm_idx]; + switch (i) { + case 0: srci = rms_norm->src[0]; break; + case 1: srci = tensor->src[1 - rms_norm_idx]; break; + default: continue; + } + } if (srci == nullptr) { continue; } @@ -10533,7 +10998,12 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { } else if (tensor->op == GGML_OP_SUB) { tensor_clone = ggml_sub(ggml_ctx, src_clone[0], src_clone[1]); } else if (tensor->op == GGML_OP_MUL) { - tensor_clone = ggml_mul(ggml_ctx, src_clone[0], src_clone[1]); + if (fused_rms_norm_mul) { + tensor_clone = ggml_rms_norm(ggml_ctx, src_clone[0], *(float *)tensor->src[rms_norm_idx]->op_params); + tensor_clone = ggml_mul(ggml_ctx, tensor_clone, src_clone[1 - rms_norm_idx]); + } else { + tensor_clone = ggml_mul(ggml_ctx, src_clone[0], src_clone[1]); + } } else if (tensor->op == GGML_OP_DIV) { tensor_clone = ggml_div(ggml_ctx, src_clone[0], src_clone[1]); } else if (tensor->op == GGML_OP_CONCAT) { @@ -10621,6 +11091,9 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { case GGML_UNARY_OP_GELU: tensor_clone = ggml_gelu(ggml_ctx, src_clone[0]); break; + case GGML_UNARY_OP_GELU_ERF: + tensor_clone = ggml_gelu_erf(ggml_ctx, src_clone[0]); + break; case GGML_UNARY_OP_GELU_QUICK: tensor_clone = ggml_gelu_quick(ggml_ctx, src_clone[0]); break; @@ -10637,6 +11110,12 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { std::cerr << "Missing vk_check_results OP: " << ggml_op_name(tensor->op) << std::endl; GGML_ABORT("fatal error"); } + } else if (tensor->op == GGML_OP_GLU) { + if (src_clone[1] == nullptr) { + tensor_clone = ggml_glu(ggml_ctx, src_clone[0], (ggml_glu_op) tensor->op_params[0], tensor->op_params[1]); + } else { + tensor_clone = ggml_glu_split(ggml_ctx, src_clone[0], src_clone[1], (ggml_glu_op) tensor->op_params[0]); + } } else if (tensor->op == GGML_OP_CPY || tensor->op == GGML_OP_DUP) { if (src1 == nullptr) { tensor_clone = ggml_dup(ggml_ctx, src_clone[0]); @@ -10644,6 +11123,8 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { } else { tensor_clone = ggml_cpy(ggml_ctx, src_clone[0], src_clone[1]); } + } else if (tensor->op == GGML_OP_SET_ROWS) { + tensor_clone = ggml_set_rows(ggml_ctx, src_clone[0], src_clone[1]); } else if (tensor->op == GGML_OP_CONT) { tensor_clone = ggml_cont_4d(ggml_ctx, src_clone[0], tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]); } else if (tensor->op == GGML_OP_RESHAPE) { @@ -10715,10 +11196,10 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { GGML_ABORT("fatal error"); } - ggml_cgraph * cgraph = ggml_new_graph(ggml_ctx); - ggml_build_forward_expand(cgraph, tensor_clone); + ggml_cgraph * cgraph_cpu = ggml_new_graph(ggml_ctx); + ggml_build_forward_expand(cgraph_cpu, tensor_clone); - ggml_graph_compute_with_ctx(ggml_ctx, cgraph, 8); + ggml_graph_compute_with_ctx(ggml_ctx, cgraph_cpu, 8); if (vk_output_tensor > 0 && vk_output_tensor == check_counter) { ggml_vk_print_tensor(tensor_clone, "tensor_clone"); @@ -10741,10 +11222,19 @@ static void ggml_vk_check_results_0(ggml_tensor * tensor) { VK_LOG_DEBUG("END ggml_vk_check_results_0(" << tensor->name << ")"); } -static void ggml_vk_check_results_1(ggml_tensor * tensor) { +static void ggml_vk_check_results_1(ggml_backend_vk_context * ctx, ggml_cgraph * cgraph, int tensor_idx) { + ggml_tensor * tensor = cgraph->nodes[tensor_idx]; if (tensor->op == GGML_OP_TRANSPOSE) { return; } + bool fused_rms_norm_mul = false; + if (ctx->num_additional_fused_ops == 1 && + tensor->op == GGML_OP_RMS_NORM && + cgraph->nodes[tensor_idx + 1]->op == GGML_OP_MUL) { + fused_rms_norm_mul = true; + tensor = cgraph->nodes[tensor_idx + 1]; + } + if (!(vk_output_tensor > 0 && vk_output_tensor == check_counter) && check_counter <= vk_skip_checks) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt index e60e9d1e5b5c5..e1f613fb4f683 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt +++ b/ggml/src/ggml-vulkan/vulkan-shaders/CMakeLists.txt @@ -19,21 +19,13 @@ if (GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT) add_compile_definitions(GGML_VULKAN_BFLOAT16_GLSLC_SUPPORT) message(STATUS "Enabling bfloat16 glslc support") endif() +if (GGML_VULKAN_SHADER_DEBUG_INFO) + add_compile_definitions(GGML_VULKAN_SHADER_DEBUG_INFO) + message(STATUS "Enabling shader debug info") +endif() set(TARGET vulkan-shaders-gen) add_executable(${TARGET} vulkan-shaders-gen.cpp) install(TARGETS ${TARGET} RUNTIME) target_compile_features(${TARGET} PRIVATE cxx_std_17) target_link_libraries(vulkan-shaders-gen PUBLIC Threads::Threads) - -# Configure output directories for MSVC builds -if(MSVC) - # Get the main project's runtime output directory if possible - if(DEFINED CMAKE_RUNTIME_OUTPUT_DIRECTORY) - foreach(CONFIG ${CMAKE_CONFIGURATION_TYPES}) - string(TOUPPER ${CONFIG} CONFIG) - set_target_properties(${TARGET} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY_${CONFIG} ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}) - endforeach() - endif() -endif() diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp index 9c76437d9b0b9..27d6b7464f62c 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/copy_to_quant.comp @@ -1,22 +1,26 @@ #version 450 -#if RTE16 -#extension GL_EXT_spirv_intrinsics : enable -spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits -#endif // RTE16 - +#include "rte.comp" #include "types.comp" -#include "generic_unary_head.comp" -#if defined(DATA_A_IQ4_NL) -// 16 invocations needed for init_iq4nl_shmem -layout(local_size_x = 16, local_size_y = 1, local_size_z = 1) in; +#if defined(SET_ROWS) && QUANT_K == 1 +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; +const uint BLOCK_SIZE = 512; #else -layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in; +layout(local_size_x = 32, local_size_y = 1, local_size_z = 1) in; +const uint BLOCK_SIZE = 32; #endif layout (binding = 0) readonly buffer S {float data_s[];}; + +#if defined(SET_ROWS) +#include "generic_binary_head.comp" +layout (binding = 1) readonly buffer C {uvec2 data_i[];}; +layout (binding = 2) writeonly buffer Q {A_TYPE data_q[];}; +#else +#include "generic_unary_head.comp" layout (binding = 1) writeonly buffer Q {A_TYPE data_q[];}; +#endif #if defined(DATA_A_Q4_0) void quantize(uint dst_idx, uint src_idx) @@ -221,15 +225,56 @@ void quantize(uint dst_idx, uint src_idx) } #endif +#if defined(DATA_A_F32) || defined(DATA_A_F16) +void quantize(uint dst_idx, uint src_idx) +{ + data_q[dst_idx] = A_TYPE(data_s[src_idx]); +} +#endif + +#if defined(DATA_A_BF16) +void quantize(uint dst_idx, uint src_idx) +{ + data_q[dst_idx] = A_TYPE(fp32_to_bf16(data_s[src_idx])); +} +#endif + +#if defined(SET_ROWS) + void main() { #ifdef NEEDS_INIT_IQ_SHMEM init_iq_shmem(gl_WorkGroupSize); - if (gl_LocalInvocationIndex.x != 0) { +#endif + + const uint idx = ((gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x) * BLOCK_SIZE + gl_LocalInvocationID.x) * QUANT_K; + + if (idx >= p.ne) { return; } + + uint i00, i01, i02, i03; + get_indices(idx, i00, i01, i02, i03); + + uint i12 = fastmod(i03, p.ne12); + uint i11 = fastmod(i02, p.ne11); + uint i10 = i01; + + uint i1 = data_i[src1_idx(i10, i11, i12, 0) + get_boffset()].x; + + uint src0_idx = src0_idx(i00, i01, i02, i03) + get_aoffset(); + uint dst_idx = dst_idx(i00 / QUANT_K, i1, i02, i03) + get_doffset(); + + quantize(dst_idx, src0_idx); +} + +#else + +void main() { +#ifdef NEEDS_INIT_IQ_SHMEM + init_iq_shmem(gl_WorkGroupSize); #endif - const uint idx = gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x * QUANT_K; + const uint idx = (gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x * 32 + gl_LocalInvocationID.x) * QUANT_K; if (idx >= p.ne) { return; @@ -240,3 +285,5 @@ void main() { quantize(dst_idx, src_idx); } + +#endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp index 157154af3a328..d4e4e6bae63df 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q2_k.comp @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; void main() { [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) { const uint i = gl_WorkGroupID.x * 256 + wgy; - if (i >= p.M * p.K / QUANT_K) { + if (i >= p.nel / QUANT_K) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp index c17dd0d999116..3661f771c745f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q3_k.comp @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; void main() { [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) { const uint i = uint(gl_WorkGroupID.x * 256 + wgy); - if (i >= p.M * p.K / QUANT_K) { + if (i >= p.nel / QUANT_K) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp index 987f113a35ad0..1370db3654dd7 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q4_k.comp @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; void main() { [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) { const uint ib = gl_WorkGroupID.x * 256 + wgy; - if (ib >= p.M * p.K / QUANT_K) { + if (ib >= p.nel / QUANT_K) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp index 6db5403b6613e..3f3b839e11832 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q5_k.comp @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; void main() { [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) { const uint ib = gl_WorkGroupID.x * 256 + wgy; - if (ib >= p.M * p.K / QUANT_K) { + if (ib >= p.nel / QUANT_K) { return; } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp index 0b91317550f97..9cf34256e8c80 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/dequant_q6_k.comp @@ -10,7 +10,7 @@ layout (binding = 1) writeonly buffer D {D_TYPE data_b[];}; void main() { [[unroll]] for (uint wgy = 0; wgy < 256; wgy++) { const uint i = gl_WorkGroupID.x * 256 + wgy; - if (i >= p.M * p.K / QUANT_K) { + if (i >= p.nel / QUANT_K) { return; } const uint tid = gl_LocalInvocationID.x; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp index ce230a8f7d910..45c6e7736ace6 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn.comp @@ -11,7 +11,8 @@ #include "types.comp" #include "flash_attn_base.comp" -const uint32_t D_per_thread = D / D_split; +const uint32_t HSK_per_thread = HSK / D_split; +const uint32_t HSV_per_thread = HSV / D_split; const uint32_t cols_per_iter = WorkGroupSize / D_split; const uint32_t cols_per_thread = Bc / cols_per_iter; @@ -29,7 +30,7 @@ layout (binding = 3) readonly buffer M {float16_t data_m[];}; // Rows index by Q's dimension 2, and the first N rows are valid. D_TYPE perElemOpGqaStore(const in uint32_t r, const in uint32_t c, const in D_TYPE elem, const in uint32_t o_offset, const in uint32_t iq2, const in uint32_t N) { - uint32_t offset = (iq2 + r) * D + c; + uint32_t offset = (iq2 + r) * HSV + c; data_o[o_offset + offset] = D_TYPE(elem); return elem; } @@ -38,7 +39,7 @@ shared FLOAT_TYPE tmpsh[WorkGroupSize]; shared vec4 tmpshv4[WorkGroupSize]; shared float masksh[Bc][Br]; -shared vec4 Qf[Br][D / 4]; +shared vec4 Qf[Br][HSK / 4]; void main() { #ifdef NEEDS_INIT_IQ_SHMEM @@ -53,18 +54,18 @@ void main() { uint32_t q_offset = (iq2*p.nb02+iq3*p.nb03) / 4; - [[unroll]] for (uint32_t idx = 0; idx < Br * D / 4; idx += gl_WorkGroupSize.x) { - uint32_t d = (idx + tid) % (D / 4); - uint32_t r = (idx + tid) / (D / 4); - if (r < Br && d < D / 4 && + [[unroll]] for (uint32_t idx = 0; idx < Br * HSK / 4; idx += gl_WorkGroupSize.x) { + uint32_t d = (idx + tid) % (HSK / 4); + uint32_t r = (idx + tid) / (HSK / 4); + if (r < Br && d < HSK / 4 && i * Br + r < N) { Qf[r][d] = vec4(data_qv4[q_offset / 4 + (i * Br + r) * q_stride / 4 + d]) * p.scale; } } barrier(); - vec4 Of[Br][D_per_thread / 4]; - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + vec4 Of[Br][HSV_per_thread / 4]; + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < Br; ++r) { Of[r][d] = vec4(0.0); } @@ -99,6 +100,10 @@ void main() { uint32_t k_offset = (ik2*p.nb12 + ik3*p.nb13) / 2; uint32_t v_offset = (iv2*p.nb22 + iv3*p.nb23) / 2; #endif + uint32_t m_offset = 0; + if (p.nem2 != 1 || p.nem3 != 1) { + m_offset = ((iq3 % p.nem3) * p.nem2 + (iq2 % p.nem2)) * p.nem1 * KV; + } [[dont_unroll]] for (uint32_t j = start_j; j < end_j; ++j) { @@ -112,7 +117,7 @@ void main() { [[unroll]] for (uint32_t c = 0; c < cols_per_thread; ++c) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSK_per_thread / 4; ++d) { #if BLOCK_SIZE > 1 uint coord = (j * Bc + c * cols_per_iter + col_tid) * k_stride * BLOCK_SIZE + 4 * (d * D_split + d_tid); uint ib = coord / BLOCK_SIZE; @@ -144,13 +149,13 @@ void main() { } } - if (p.mask != 0) { + if ((p.mask_n_head_log2 & MASK_ENABLE_BIT) != 0) { [[unroll]] for (uint32_t idx = 0; idx < Bc * Br; idx += gl_WorkGroupSize.x) { uint32_t c = (idx + tid) % Bc; uint32_t r = (idx + tid) / Bc; if (idx + tid < Bc * Br) { - masksh[c][r] = float(data_m[(i * Br + r) * m_stride + (j * Bc + c)]); + masksh[c][r] = float(data_m[m_offset + (i * Br + r) * m_stride + (j * Bc + c)]); } } barrier(); @@ -191,14 +196,14 @@ void main() { Lf[r] = eMf[r]*Lf[r] + rowsumf[r]; } - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < Br; ++r) { Of[r][d] = eMf[r] * Of[r][d]; } } [[unroll]] for (uint32_t c = 0; c < cols_per_thread; ++c) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { #if BLOCK_SIZE > 1 uint coord = (j * Bc + c * cols_per_iter + col_tid) * v_stride * BLOCK_SIZE + 4 * (d * D_split + d_tid); uint ib = coord / BLOCK_SIZE; @@ -255,7 +260,7 @@ void main() { Lf[r] = tmpsh[d_tid]; barrier(); - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { Of[r][d] = eMf * Of[r][d]; tmpshv4[tid] = Of[r][d]; @@ -277,11 +282,11 @@ void main() { // If there is split_k, then the split_k resolve shader does the final // division by L. Store the intermediate O value and per-row m and L values. if (p.k_num > 1) { - uint32_t o_offset = D * p.ne1 * split_k_index; + uint32_t o_offset = HSV * p.ne1 * (split_k_index + iq3 * p.k_num); [[unroll]] for (uint32_t r = 0; r < Br; ++r) { if (r < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { perElemOpGqaStore(r, 4*(d * D_split + d_tid) + comp, Of[r][d][comp], o_offset, iq2, N); } @@ -289,7 +294,7 @@ void main() { } } - o_offset = D * p.ne1 * p.k_num + p.ne1 * split_k_index * 2; + o_offset = HSV * p.ne1 * p.ne3 * p.k_num + p.ne1 * (split_k_index + iq3 * p.k_num) * 2; [[unroll]] for (uint32_t r = 0; r < Br; ++r) { if (r < N) { perElemOpStoreCol0(r, 0u, ACC_TYPE(Lf[r]), o_offset, iq2, N); @@ -305,18 +310,18 @@ void main() { Lfrcp[r] = 1.0 / Lf[r]; } - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < Br; ++r) { Of[r][d] *= Lfrcp[r]; } } - uint32_t o_offset = iq3*p.ne2*p.ne1; + uint32_t o_offset = iq3*p.ne2*p.ne1*HSV; if (p.gqa_ratio > 1) { [[unroll]] for (uint32_t r = 0; r < Br; ++r) { if (r < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { perElemOpGqaStore(r, 4*(d * D_split + d_tid) + comp, Of[r][d][comp], o_offset, iq2, N); } @@ -326,9 +331,9 @@ void main() { } else { [[unroll]] for (uint32_t r = 0; r < Br; ++r) { if (i * Br + r < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { - data_o[o_offset + iq2 * D + (i * Br + r) * p.ne1 * D + 4*(d * D_split + d_tid) + comp] = D_TYPE(Of[r][d][comp]); + data_o[o_offset + iq2 * HSV + (i * Br + r) * p.ne1 * HSV + 4*(d * D_split + d_tid) + comp] = D_TYPE(Of[r][d][comp]); } } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp index 61d90e2d8ed21..7defe72b403b5 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_base.comp @@ -4,10 +4,10 @@ layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; layout (constant_id = 0) const uint32_t WorkGroupSize = 128; layout (constant_id = 1) const uint32_t Br = 1; layout (constant_id = 2) const uint32_t Bc = 32; -layout (constant_id = 3) const uint32_t D = 32; -layout (constant_id = 4) const uint32_t Clamp = 0; -layout (constant_id = 5) const uint32_t D_split = 16; - +layout (constant_id = 3) const uint32_t HSK = 32; +layout (constant_id = 4) const uint32_t HSV = 32; +layout (constant_id = 5) const uint32_t Clamp = 0; +layout (constant_id = 6) const uint32_t D_split = 16; layout (push_constant) uniform parameter { uint32_t N; @@ -24,6 +24,8 @@ layout (push_constant) uniform parameter { uint32_t nev2; uint32_t nev3; uint32_t nem1; + uint32_t nem2; + uint32_t nem3; uint32_t nb01; uint32_t nb02; @@ -34,14 +36,12 @@ layout (push_constant) uniform parameter { uint32_t nb21; uint32_t nb22; uint32_t nb23; - uint32_t nb31; float scale; float max_bias; float logit_softcap; - uint32_t mask; - uint32_t n_head_log2; + uint32_t mask_n_head_log2; float m0; float m1; @@ -50,6 +50,9 @@ layout (push_constant) uniform parameter { uint32_t k_num; } p; +#define MASK_ENABLE_BIT (1<<16) +#define N_LOG2_MASK 0xFFFF + layout (binding = 4) writeonly buffer O {D_TYPE data_o[];}; #if defined(A_TYPE_PACKED16) @@ -100,8 +103,10 @@ ACC_TYPE perElemOpComputeSlope(const in uint32_t r, const in uint32_t c, const i { const uint32_t h = iq2 + (r % p.gqa_ratio); - const ACC_TYPE base = ACC_TYPE(h < p.n_head_log2 ? p.m0 : p.m1); - const int exph = int(h < p.n_head_log2 ? h + 1 : 2*(h - p.n_head_log2) + 1); + uint32_t n_head_log2 = p.mask_n_head_log2 & N_LOG2_MASK; + + const ACC_TYPE base = ACC_TYPE(h < n_head_log2 ? p.m0 : p.m1); + const int exph = int(h < n_head_log2 ? h + 1 : 2*(h - n_head_log2) + 1); return ACC_TYPE(pow(base, ACC_TYPE(exph))); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp index da478be24fb6e..486735fe8b0c9 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm1.comp @@ -13,7 +13,9 @@ #include "types.comp" #include "flash_attn_base.comp" -const uint32_t D_per_thread = D / D_split; +const uint32_t HSK_per_thread = HSK / D_split; +const uint32_t HSV_per_thread = HSV / D_split; + const uint32_t row_split = 4; const uint32_t rows_per_thread = Br / row_split; const uint32_t cols_per_iter = gl_WorkGroupSize.x / D_split / row_split; @@ -32,7 +34,7 @@ layout (binding = 3) readonly buffer M {float16_t data_m[];}; // Rows index by Q's dimension 2, and the first N rows are valid. D_TYPE perElemOpGqaStore(const in uint32_t r, const in uint32_t c, const in D_TYPE elem, const in uint32_t o_offset, const in uint32_t iq2, const in uint32_t N) { - uint32_t offset = (iq2 + r) * D + c; + uint32_t offset = (iq2 + r) * HSV + c; data_o[o_offset + offset] = D_TYPE(elem); return elem; } @@ -44,14 +46,14 @@ const uint32_t MatBc = 16; shared FLOAT_TYPE tmpsh[gl_WorkGroupSize.x]; shared ACC_TYPEV4 tmpshv4[gl_WorkGroupSize.x]; -const uint32_t qstride = D / 4 + 2; // in units of f16vec4 +const uint32_t qstride = HSK / 4 + 2; // in units of f16vec4 shared f16vec4 Qf[Br * qstride]; -// Avoid padding for D==256 to make it fit in 48KB shmem. -const uint32_t sfshstride = (D <= 128) ? (Br + 8) : Br; +// Avoid padding for hsk==256 to make it fit in 48KB shmem. +const uint32_t sfshstride = (HSK <= 128) ? (Br + 8) : Br; shared ACC_TYPE sfsh[Bc * sfshstride]; -const uint32_t kshstride = D / 4 + 2; // in units of f16vec4 +const uint32_t kshstride = HSK / 4 + 2; // in units of f16vec4 shared f16vec4 ksh[Bc * kshstride]; shared float slope[Br]; @@ -74,18 +76,18 @@ void main() { uint32_t q_offset = (iq2*p.nb02+iq3*p.nb03) / 4; - [[unroll]] for (uint32_t idx = 0; idx < Br * D / 4; idx += gl_WorkGroupSize.x) { - uint32_t d = (idx + tid) % (D / 4); - uint32_t r = (idx + tid) / (D / 4); - if (r < Br && d < D / 4 && + [[unroll]] for (uint32_t idx = 0; idx < Br * HSK / 4; idx += gl_WorkGroupSize.x) { + uint32_t d = (idx + tid) % (HSK / 4); + uint32_t r = (idx + tid) / (HSK / 4); + if (r < Br && d < HSK / 4 && i * Br + r < N) { Qf[r * qstride + d] = f16vec4(data_qv4[q_offset / 4 + (i * Br + r) * q_stride / 4 + d] * p.scale); } } barrier(); - ACC_TYPEV4 Of[rows_per_thread][D_per_thread / 4]; - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + ACC_TYPEV4 Of[rows_per_thread][HSV_per_thread / 4]; + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { Of[r][d] = ACC_TYPEV4(0.0); } @@ -123,14 +125,18 @@ void main() { uint32_t k_offset = (ik2*p.nb12 + ik3*p.nb13) / 2; uint32_t v_offset = (iv2*p.nb22 + iv3*p.nb23) / 2; #endif + uint32_t m_offset = 0; + if (p.nem2 != 1 || p.nem3 != 1) { + m_offset = ((iq3 % p.nem3) * p.nem2 + (iq2 % p.nem2)) * p.nem1 * KV; + } [[dont_unroll]] for (uint32_t j = start_j; j < end_j; ++j) { - [[unroll]] for (uint32_t idx = 0; idx < Bc * D / 4; idx += gl_WorkGroupSize.x) { - uint32_t d = (idx + tid) % (D / 4); - uint32_t c = (idx + tid) / (D / 4); - if (c < Bc && d < D / 4) { + [[unroll]] for (uint32_t idx = 0; idx < Bc * HSK / 4; idx += gl_WorkGroupSize.x) { + uint32_t d = (idx + tid) % (HSK / 4); + uint32_t c = (idx + tid) / (HSK / 4); + if (c < Bc && d < HSK / 4) { #if BLOCK_SIZE > 1 uint coord = (j * Bc + c) * k_stride * BLOCK_SIZE + 4 * d; uint ib = coord / BLOCK_SIZE; @@ -145,14 +151,14 @@ void main() { } barrier(); - // K * Q^T -> S^T: Bc x D * D x Br -> Bc x Br - // Bc split across workgroup (four subgroups), loop over D in chunks of 16: 16 x 16 * 16 x 16 -> 16 x 16 + // K * Q^T -> S^T: Bc x HSK * HSK x Br -> Bc x Br + // Bc split across workgroup (four subgroups), loop over HSK in chunks of 16: 16 x 16 * 16 x 16 -> 16 x 16 // This is written transposed in order to allow for N being 8 if implementations need it coopmat SfMat = coopmat(0); coopmat KMat; coopmat QMat; - for (uint32_t d = 0; d < D / 16; ++d) { + for (uint32_t d = 0; d < HSK / 16; ++d) { coopMatLoad(QMat, Qf, d * 16 / 4, qstride, gl_CooperativeMatrixLayoutColumnMajor); uint coord = (gl_SubgroupID * MatBc) * kshstride + d * 16 / 4; @@ -176,12 +182,12 @@ void main() { barrier(); } - if (p.mask != 0) { + if ((p.mask_n_head_log2 & MASK_ENABLE_BIT) != 0) { [[unroll]] for (uint32_t idx = 0; idx < Bc * Br; idx += gl_WorkGroupSize.x) { uint32_t c = (idx + tid) % Bc; uint32_t r = (idx + tid) / Bc; if (idx + tid < Bc * Br || idx + gl_WorkGroupSize.x <= Bc * Br) { - sfsh[c * sfshstride + r] += ACC_TYPE(slope[r] * float(data_m[(i * Br + r) * m_stride + (j * Bc + c)])); + sfsh[c * sfshstride + r] += ACC_TYPE(slope[r] * float(data_m[m_offset + (i * Br + r) * m_stride + (j * Bc + c)])); } } barrier(); @@ -202,7 +208,7 @@ void main() { eMf[r] = exp(Moldf - Mf[r]); } - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { Of[r][d] = float16_t(eMf[r]) * Of[r][d]; } @@ -217,7 +223,7 @@ void main() { Pf[r] = exp(sfsh[tile_row(r) + (c * cols_per_iter + col_tid) * sfshstride] - Mf[r]); Lf[r] += Pf[r]; } - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { #if BLOCK_SIZE > 1 uint coord = (j * Bc + c * cols_per_iter + col_tid) * v_stride * BLOCK_SIZE + 4 * (d * D_split + d_tid); uint ib = coord / BLOCK_SIZE; @@ -280,7 +286,7 @@ void main() { } [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { Of[r][d] = float16_t(eMf[r]) * Of[r][d]; tmpshv4[tid] = Of[r][d]; @@ -300,11 +306,11 @@ void main() { // If there is split_k, then the split_k resolve shader does the final // division by L. Store the intermediate O value and per-row m and L values. if (p.k_num > 1) { - uint32_t o_offset = D * p.ne1 * split_k_index; + uint32_t o_offset = HSV * p.ne1 * (split_k_index + iq3 * p.k_num); [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { if (tile_row(r) < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { perElemOpGqaStore(tile_row(r), 4*(d * D_split + d_tid) + comp, float(Of[r][d][comp]), o_offset, iq2, N); } @@ -312,7 +318,7 @@ void main() { } } - o_offset = D * p.ne1 * p.k_num + p.ne1 * split_k_index * 2; + o_offset = HSV * p.ne1 * p.ne3 * p.k_num + p.ne1 * (split_k_index + iq3 * p.k_num) * 2; [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { if (tile_row(r) < N) { perElemOpStoreCol0(tile_row(r), 0u, ACC_TYPE(Lf[r]), o_offset, iq2, N); @@ -328,18 +334,18 @@ void main() { Lfrcp[r] = 1.0 / Lf[r]; } - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { Of[r][d] *= float16_t(Lfrcp[r]); } } - uint32_t o_offset = iq3*p.ne2*p.ne1; + uint32_t o_offset = iq3*p.ne2*p.ne1*HSV; if (p.gqa_ratio > 1) { [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { if (tile_row(r) < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { perElemOpGqaStore(tile_row(r), 4*(d * D_split + d_tid) + comp, float(Of[r][d][comp]), o_offset, iq2, N); } @@ -349,9 +355,9 @@ void main() { } else { [[unroll]] for (uint32_t r = 0; r < rows_per_thread; ++r) { if (i * Br + tile_row(r) < N) { - [[unroll]] for (uint32_t d = 0; d < D_per_thread / 4; ++d) { + [[unroll]] for (uint32_t d = 0; d < HSV_per_thread / 4; ++d) { [[unroll]] for (uint32_t comp = 0; comp < 4; ++comp) { - data_o[o_offset + iq2 * D + (i * Br + tile_row(r)) * p.ne1 * D + 4*(d * D_split + d_tid) + comp] = D_TYPE(Of[r][d][comp]); + data_o[o_offset + iq2 * HSV + (i * Br + tile_row(r)) * p.ne1 * HSV + 4*(d * D_split + d_tid) + comp] = D_TYPE(Of[r][d][comp]); } } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp index 6acf67a03a463..274f48fcabdd0 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_cm2.comp @@ -61,8 +61,8 @@ ACC_TYPE Max(const in uint32_t row, const in uint32_t col, const in ACC_TYPE ele // Rows index by Q's dimension 2, and the first N rows are valid. D_TYPE perElemOpGqaStore(const in uint32_t r, const in uint32_t c, const in D_TYPE elem, const in uint32_t o_offset, const in uint32_t iq2, const in uint32_t N) { - if (r < N && c < D) { - uint32_t offset = (iq2 + r) * D + c; + if (r < N && c < HSV) { + uint32_t offset = (iq2 + r) * HSV + c; data_o[o_offset + offset] = D_TYPE(elem); } return elem; @@ -86,9 +86,9 @@ void main() { tensorLayoutV = setTensorLayoutBlockSizeNV(tensorLayoutV, 1, BLOCK_SIZE); #endif - tensorLayoutQ = setTensorLayoutDimensionNV(tensorLayoutQ, N, D); - tensorLayoutK = setTensorLayoutDimensionNV(tensorLayoutK, KV, D); - tensorLayoutV = setTensorLayoutDimensionNV(tensorLayoutV, KV, D); + tensorLayoutQ = setTensorLayoutDimensionNV(tensorLayoutQ, N, HSK); + tensorLayoutK = setTensorLayoutDimensionNV(tensorLayoutK, KV, HSK); + tensorLayoutV = setTensorLayoutDimensionNV(tensorLayoutV, KV, HSV); // hint to the compiler that strides are aligned for the aligned variant of the shader if (Clamp != gl_CooperativeMatrixClampModeConstantNV) @@ -104,16 +104,16 @@ void main() { tensorLayoutK = setTensorLayoutStrideNV(tensorLayoutK, k_stride, 1); tensorLayoutV = setTensorLayoutStrideNV(tensorLayoutV, v_stride, 1); - coopmat Q; - coopmat Qf16; + coopmat Q; + coopmat Qf16; uint32_t q_offset = iq2*p.nb02+iq3*p.nb03; - coopMatLoadTensorNV(Q, data_q, q_offset, sliceTensorLayoutNV(tensorLayoutQ, i * Br, Br, 0, D)); + coopMatLoadTensorNV(Q, data_q, q_offset, sliceTensorLayoutNV(tensorLayoutQ, i * Br, Br, 0, HSK)); - Qf16 = coopmat(Q); + Qf16 = coopmat(Q); Qf16 *= float16_t(p.scale); - coopmat O = coopmat(0); + coopmat O = coopmat(0); coopmat L, M; @@ -130,15 +130,20 @@ void main() { coopMatPerElementNV(slopeMat, slopeMat, perElemOpComputeSlope, iq2); } + uint32_t m_offset = 0; + if (p.nem2 != 1 || p.nem3 != 1) { + m_offset = ((iq3 % p.nem3) * p.nem2 + (iq2 % p.nem2)) * p.nem1 * KV * 2 /*sizeof(float16_t)*/; + } + [[dont_unroll]] for (uint32_t j = start_j; j < end_j; ++j) { coopmat S = coopmat(0); - coopmat K_T; + coopmat K_T; uint32_t k_offset = ik2*p.nb12 + ik3*p.nb13; - coopMatLoadTensorNV(K_T, data_k, k_offset, sliceTensorLayoutNV(tensorLayoutK, j * Bc, Bc, 0, D), tensorViewTranspose DECODEFUNC); + coopMatLoadTensorNV(K_T, data_k, k_offset, sliceTensorLayoutNV(tensorLayoutK, j * Bc, Bc, 0, HSK), tensorViewTranspose DECODEFUNC); S = coopMatMulAdd(Qf16, K_T, S); if (p.logit_softcap != 0.0f) { @@ -148,14 +153,14 @@ void main() { } } - if (p.mask != 0) { + if ((p.mask_n_head_log2 & MASK_ENABLE_BIT) != 0) { tensorLayoutNV<2, Clamp> tensorLayoutM = createTensorLayoutNV(2, Clamp); tensorLayoutM = setTensorLayoutDimensionNV(tensorLayoutM, p.nem1, KV); tensorLayoutM = setTensorLayoutStrideNV(tensorLayoutM, m_stride, 1); coopmat mv; - coopMatLoadTensorNV(mv, data_m, 0, sliceTensorLayoutNV(tensorLayoutM, i * Br, Br, j * Bc, Bc)); + coopMatLoadTensorNV(mv, data_m, m_offset, sliceTensorLayoutNV(tensorLayoutM, i * Br, Br, j * Bc, Bc)); S += slopeMat*coopmat(mv); } @@ -203,42 +208,42 @@ void main() { rowsum = coopmat(0.0); rowsum = coopMatMulAdd(P_A, One, rowsum); - coopmat V; + coopmat V; uint32_t v_offset = iv2*p.nb22 + iv3*p.nb23; - coopMatLoadTensorNV(V, data_v, v_offset, sliceTensorLayoutNV(tensorLayoutV, j * Bc, Bc, 0, D) DECODEFUNC); + coopMatLoadTensorNV(V, data_v, v_offset, sliceTensorLayoutNV(tensorLayoutV, j * Bc, Bc, 0, HSV) DECODEFUNC); L = eM*L + rowsum; // This is the "diagonal" matrix in the paper, but since we do componentwise // multiply rather than matrix multiply it has the diagonal element smeared // across the row - coopmat eMdiag; + coopmat eMdiag; // resize eM by using smear/reduce coopMatReduceNV(eMdiag, eM, gl_CooperativeMatrixReduceRowNV, smearReduce); // multiply with fp16 accumulation, then add to O. - coopmat PV = coopmat(0); + coopmat PV = coopmat(0); PV = coopMatMulAdd(P_A, V, PV); - O = eMdiag * O + coopmat(PV); + O = eMdiag * O + coopmat(PV); } // If there is split_k, then the split_k resolve shader does the final // division by L. Store the intermediate O value and per-row m and L values. if (p.k_num > 1) { - coopmat O_D = coopmat(O); + coopmat O_D = coopmat(O); - uint32_t o_offset = D * p.ne1 * split_k_index; + uint32_t o_offset = HSV * p.ne1 * (split_k_index + iq3 * p.k_num); coopMatPerElementNV(O_D, O_D, perElemOpGqaStore, o_offset, iq2, N); - o_offset = D * p.ne1 * p.k_num + p.ne1 * split_k_index * 2; + o_offset = HSV * p.ne1 * p.ne3 * p.k_num + p.ne1 * (split_k_index + iq3 * p.k_num) * 2; coopMatPerElementNV(L, L, perElemOpStoreCol0, o_offset, iq2, N); coopMatPerElementNV(M, M, perElemOpStoreCol0, o_offset + p.ne1, iq2, N); return; } - coopmat Ldiag; + coopmat Ldiag; // resize L by using smear/reduce coopMatReduceNV(Ldiag, L, gl_CooperativeMatrixReduceRowNV, smearReduce); @@ -250,18 +255,18 @@ void main() { O = Ldiag*O; - uint32_t o_offset = iq3*p.ne2*p.ne1; + uint32_t o_offset = iq3*p.ne2*p.ne1*HSV; - coopmat O_D = coopmat(O); + coopmat O_D = coopmat(O); if (p.gqa_ratio > 1) { coopMatPerElementNV(O_D, O_D, perElemOpGqaStore, o_offset, iq2, N); } else { tensorLayoutNV<3, gl_CooperativeMatrixClampModeConstantNV> tensorLayoutD = createTensorLayoutNV(3, gl_CooperativeMatrixClampModeConstantNV); - tensorLayoutD = setTensorLayoutDimensionNV(tensorLayoutD, p.ne2, p.ne1, D); + tensorLayoutD = setTensorLayoutDimensionNV(tensorLayoutD, p.ne2, p.ne1, HSV); // permute dimensions tensorViewNV<3, false, 1, 0, 2> tensorViewPermute = createTensorViewNV(3, false, 1, 0, 2); - coopMatStoreTensorNV(O_D, data_o, o_offset, sliceTensorLayoutNV(tensorLayoutD, i * Br, Br, iq2, N, 0, D), tensorViewPermute); + coopMatStoreTensorNV(O_D, data_o, o_offset, sliceTensorLayoutNV(tensorLayoutD, i * Br, Br, iq2, N, 0, HSV), tensorViewPermute); } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp index a7e3956854c44..0a17a9df23f9f 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/flash_attn_split_k_reduce.comp @@ -2,9 +2,9 @@ #extension GL_EXT_control_flow_attributes : enable -#define BLOCK_SIZE 32 +layout(constant_id = 0) const uint BLOCK_SIZE = 32; -layout(local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in; +layout(local_size_x_id = 0, local_size_y = 1, local_size_z = 1) in; layout (binding = 0) readonly buffer A {float data_a[];}; layout (binding = 1) writeonly buffer D {float data_d[];}; @@ -12,48 +12,80 @@ layout (binding = 1) writeonly buffer D {float data_d[];}; layout (push_constant) uniform parameter { uint D; uint N; + uint ne3; uint k_num; } p; +shared float tmpsh[BLOCK_SIZE]; + void main() { // Each workgroup handles a row const uint n = gl_WorkGroupID.x; const uint tid = gl_LocalInvocationID.x; + const uint iq3 = gl_WorkGroupID.z; uint D = p.D; uint N = p.N; uint k_num = p.k_num; - uint l_offset = D * N * k_num + n; - uint m_offset = D * N * k_num + N + n; + uint l_offset = D * N * p.ne3 * k_num + N * iq3 * k_num * 2 + n; + uint m_offset = D * N * p.ne3 * k_num + N * iq3 * k_num * 2 + N + n; uint lm_stride = N * 2; // Compute the max m value for the row float m_max = -1.0/0.0; - [[unroll]] for (uint k = 0; k < k_num; ++k) { - float m = data_a[m_offset + k * lm_stride]; + for (uint k = 0; k + tid < k_num; k += BLOCK_SIZE) { + float m = data_a[m_offset + (k + tid) * lm_stride]; m_max = max(m_max, m); } + // reduce across the workgroup + tmpsh[tid] = m_max; + barrier(); + [[unroll]] for (uint s = BLOCK_SIZE/2; s > 0; s >>= 1) { + if (tid < s) { + m_max = max(m_max, tmpsh[tid + s]); + tmpsh[tid] = m_max; + } + barrier(); + } + m_max = tmpsh[0]; + + barrier(); + // Compute L based on m_max float L = 0; - [[unroll]] for (uint k = 0; k < k_num; ++k) { - float l = data_a[l_offset + k * lm_stride]; - float m = data_a[m_offset + k * lm_stride]; + for (uint k = 0; k + tid < k_num; k += BLOCK_SIZE) { + float l = data_a[l_offset + (k + tid) * lm_stride]; + float m = data_a[m_offset + (k + tid) * lm_stride]; L += exp(m - m_max) * l; } + // reduce across the workgroup + tmpsh[tid] = L; + barrier(); + [[unroll]] for (uint s = BLOCK_SIZE/2; s > 0; s >>= 1) { + if (tid < s) { + L += tmpsh[tid + s]; + tmpsh[tid] = L; + } + barrier(); + } + L = tmpsh[0]; + L = 1.0 / L; + // D dimension is split across workgroups in the y dimension + uint d = tid + gl_WorkGroupID.y * BLOCK_SIZE; // Scale and sum the O contributions based on m_max and store the result to memory - for (uint d = tid; d < D; d += BLOCK_SIZE) { + if (d < D) { float O = 0.0; [[unroll]] for (uint k = 0; k < k_num; ++k) { - uint o_offset = D * N * k + D * n + d; + uint o_offset = D * N * (k + iq3 * k_num) + D * n + d; float m = data_a[m_offset + k * lm_stride]; O += exp(m - m_max) * data_a[o_offset]; } O *= L; - data_d[D * n + d] = O; + data_d[iq3 * D * N + D * n + d] = O; } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp new file mode 100644 index 0000000000000..f4268ed24f44c --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu.comp @@ -0,0 +1,13 @@ +#version 450 + +#include "glu_head.comp" + +const float GELU_COEF_A = 0.044715f; +const float SQRT_2_OVER_PI = 0.79788456080286535587989211986876f; + +float op(float a, float b) { + const float val = SQRT_2_OVER_PI*a*(1.0f + GELU_COEF_A*a*a); + return 0.5f*a*(2.0f - 2.0f / (exp(2 * val) + 1)) * b; +} + +#include "glu_main.comp" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp new file mode 100644 index 0000000000000..cbd4cb36bff30 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_erf.comp @@ -0,0 +1,27 @@ +#version 450 + +#include "glu_head.comp" + +// based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation +// ref: https://www.johndcook.com/blog/python_erf/ +const float p_erf = 0.3275911f; +const float a1_erf = 0.254829592f; +const float a2_erf = -0.284496736f; +const float a3_erf = 1.421413741f; +const float a4_erf = -1.453152027f; +const float a5_erf = 1.061405429f; + +const float SQRT_2_INV = 0.70710678118654752440084436210484f; + +float op(float a, float b) { + const float a_div_sqr2 = a * SQRT_2_INV; + const float sign_x = sign(a_div_sqr2); + const float x = abs(a_div_sqr2); + const float t = 1.0f / (1.0f + p_erf * x); + const float y = 1.0f - (((((a5_erf * t + a4_erf) * t) + a3_erf) * t + a2_erf) * t + a1_erf) * t * exp(-x * x); + const float erf_approx = sign_x * y; + + return 0.5f * a * (1.0f + erf_approx) * b; +} + +#include "glu_main.comp" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp new file mode 100644 index 0000000000000..3a2a6897bfebb --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/geglu_quick.comp @@ -0,0 +1,11 @@ +#version 450 + +#include "glu_head.comp" + +const float GELU_QUICK_COEF = -1.702f; + +float op(float a, float b) { + return a * (1.0f / (1.0f + exp(GELU_QUICK_COEF * a))) * b; +} + +#include "glu_main.comp" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp new file mode 100644 index 0000000000000..5fd5a5e703a44 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/gelu_erf.comp @@ -0,0 +1,39 @@ +#version 450 + +#include "generic_head.comp" +#include "types.comp" + +#extension GL_EXT_control_flow_attributes : enable + +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0) readonly buffer X {A_TYPE data_a[];}; +layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; + +void main() { + // based on Abramowitz and Stegun formula 7.1.26 or similar Hastings' approximation + // ref: https://www.johndcook.com/blog/python_erf/ + const float p_erf = 0.3275911f; + const float a1_erf = 0.254829592f; + const float a2_erf = -0.284496736f; + const float a3_erf = 1.421413741f; + const float a4_erf = -1.453152027f; + const float a5_erf = 1.061405429f; + + const float SQRT_2_INV = 0.70710678118654752440084436210484f; + const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x; + + if (i >= p.KX) { + return; + } + + const float a = float(data_a[i]); + const float a_div_sqr2 = a * SQRT_2_INV; + const float sign_x = sign(a_div_sqr2); + const float x = abs(a_div_sqr2); + const float t = 1.0f / (1.0f + p_erf * x); + const float y = 1.0f - (((((a5_erf * t + a4_erf) * t) + a3_erf) * t + a2_erf) * t + a1_erf) * t * exp(-x * x); + const float erf_approx = sign_x * y; + + data_d[i] = D_TYPE(0.5f * a * (1.0f + erf_approx)); +} diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp index 062e2a4cdf2d8..4b4316cf3d9f2 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/generic_binary_head.comp @@ -1,6 +1,8 @@ #extension GL_EXT_shader_16bit_storage : require #extension GL_EXT_control_flow_attributes : require +#include "rte.comp" + layout (push_constant) uniform parameter { uint ne; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp new file mode 100644 index 0000000000000..004a61fc16254 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/glu_head.comp @@ -0,0 +1,17 @@ +#extension GL_EXT_shader_16bit_storage : require + +#include "rte.comp" + +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; + +layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; +layout (binding = 1) readonly buffer B {A_TYPE data_b[];}; +layout (binding = 2) writeonly buffer D {D_TYPE data_d[];}; + +layout (push_constant) uniform parameter +{ + uint N; + uint ne00; + uint ne20; + uint mode; +} p; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp b/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp new file mode 100644 index 0000000000000..85cf65a9ecac8 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/glu_main.comp @@ -0,0 +1,29 @@ +void main() { + const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x; + + if (i >= p.N) { + return; + } + + const uint row = i / p.ne20; + const uint col = i - row * p.ne20; + + if (p.mode == 0) { + // Default + const uint offset = p.ne00 / 2; + const uint idx = row * p.ne00 + col; + + data_d[row * offset + col] = D_TYPE(op(float(data_a[idx]), float(data_a[idx + offset]))); + } else if (p.mode == 1) { + // Swapped + const uint offset = p.ne00 / 2; + const uint idx = row * p.ne00 + col; + + data_d[row * offset + col] = D_TYPE(op(float(data_a[idx + offset]), float(data_a[idx]))); + } else { + // Split + const uint idx = row * p.ne00 + col; + + data_d[idx] = D_TYPE(op(float(data_a[idx]), float(data_b[idx]))); + } +} diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp index 09aa849e8815c..17c7ccb90d001 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/im2col.comp @@ -1,12 +1,9 @@ #version 450 #extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_spirv_intrinsics: enable #extension GL_EXT_control_flow_attributes : require -#if RTE16 -spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits -#endif +#include "rte.comp" layout (push_constant) uniform parameter { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp index 26163b167c7ed..f481549911b92 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp @@ -18,6 +18,7 @@ #extension GL_KHR_cooperative_matrix : enable #extension GL_KHR_memory_scope_semantics : enable #extension GL_KHR_shader_subgroup_basic : enable +#extension GL_KHR_shader_subgroup_ballot : enable #endif #ifdef MUL_MAT_ID @@ -104,6 +105,10 @@ shared FLOAT_TYPE buf_b[BN * SHMEM_STRIDE]; #ifdef MUL_MAT_ID shared u16vec2 row_ids[4096]; +uint _ne1; +#ifdef COOPMAT +shared uint _ne1_sh; +#endif #endif // MUL_MAT_ID #define NUM_WARPS (BLOCK_SIZE / WARP) @@ -172,7 +177,47 @@ void main() { const uint loadstride_b = gl_WorkGroupSize.x * LOAD_VEC_B / BK; #ifdef MUL_MAT_ID - uint _ne1 = 0; +#ifdef COOPMAT + // Spread the search across all elements in the first subgroup + if (gl_SubgroupID == 0) { + _ne1 = 0; + uint num_elements = p.nei1 * p.nei0; + + uint ids[16]; + uint iter = 0; + + for (uint j = 0; j < num_elements; j += gl_SubgroupSize) { + // prefetch up to 16 elements + if (iter == 0) { + [[unroll]] for (uint k = 0; k < 16; ++k) { + uint i = j + gl_SubgroupInvocationID + k*gl_SubgroupSize; + bool in_range = i < num_elements; + uint ii1 = i / p.nei0; + uint ii0 = i % p.nei0; + ids[k] = in_range ? data_ids[ii1*p.nbi1 + ii0] : 0; + } + } + uint i = j + gl_SubgroupInvocationID; + bool in_range = i < num_elements; + uint ii1 = i / p.nei0; + uint ii0 = i % p.nei0; + uint id = ids[iter++]; + uvec4 ballot = subgroupBallot(in_range && id == expert_idx); + uint idx = subgroupBallotExclusiveBitCount(ballot); + if (in_range && id == expert_idx) { + row_ids[_ne1 + idx] = u16vec2(ii0, ii1); + } + _ne1 += subgroupBallotBitCount(ballot); + iter &= 15; + } + _ne1_sh = _ne1; + } + + barrier(); + + _ne1 = _ne1_sh; +#else + _ne1 = 0; for (uint ii1 = 0; ii1 < p.nei1; ii1++) { for (uint ii0 = 0; ii0 < p.nei0; ii0++) { if (data_ids[ii1*p.nbi1 + ii0] == expert_idx) { @@ -183,6 +228,7 @@ void main() { } barrier(); +#endif // Workgroup has no work if (ic * BN >= _ne1) return; @@ -500,10 +546,9 @@ void main() { const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint ib32 = (idx % 128) / 16; // 0..7 - const uint ib8 = (idx % 128) / 4; - const int i8 = 2 * int(idx % 4); + const uint ib = idx / 32; // 8 values per idx + const uint ib32 = (idx % 32) / 4; // 0..7 + const uint ib8 = idx % 32; const float d = float(data_a[ib].d); const uint qh = data_a[ib].qh[ib32]; @@ -512,22 +557,16 @@ void main() { const float delta = ((qh & 0x8000) != 0) ? -IQ1S_DELTA : IQ1S_DELTA; const int16_t grid = int16_t(iq1s_grid[qs | (bitfieldExtract(qh, 3 * int(ib8 & 3), 3) << 8)]); - const ivec2 gvec = ivec2( - bitfieldExtract(grid, 2 * (i8), 2), - bitfieldExtract(grid, 2 * (i8 + 1), 2) - ); - const vec2 v = dl * (vec2(gvec) + delta); - - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + [[unroll]] for (int k = 0; k < 8; ++k) { + buf_a[buf_idx + k] = FLOAT_TYPE(dl * (bitfieldExtract(grid, 2 * k, 2) + delta)); + } #elif defined(DATA_A_IQ1_M) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint ib8 = (idx % 128) / 4; + const uint ib = idx / 32; // 8 values per idx + const uint ib8 = idx % 32; const uint ib16 = ib8 / 2; - const int i8 = 2 * int(idx % 4); const uint16_t[4] scales = data_a[ib].scales; const u16vec4 s = u16vec4(scales[0], scales[1], scales[2], scales[3]) >> 12; @@ -538,21 +577,17 @@ void main() { const float dl = d * (2 * bitfieldExtract(sc, 3 * int(ib16 & 3), 3) + 1); const float delta = ((qh & 8) != 0) ? -IQ1M_DELTA : IQ1M_DELTA; const int16_t grid = int16_t(iq1s_grid[qs | ((qh & 7) << 8)]); - const ivec2 gvec = ivec2( - bitfieldExtract(grid, 2 * (i8), 2), - bitfieldExtract(grid, 2 * (i8 + 1), 2) - ); - const vec2 v = dl * (vec2(gvec) + delta); - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + [[unroll]] for (int k = 0; k < 8; ++k) { + buf_a[buf_idx + k] = FLOAT_TYPE(dl * (bitfieldExtract(grid, 2 * k, 2) + delta)); + } #elif defined(DATA_A_IQ2_XXS) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint ib32 = (idx % 128) / 16; // 0..7 - const uint ib8 = (idx / 4) % 4; + const uint ib = idx / 32; // 8 values per idx + const uint ib32 = (idx % 32) / 4; // 0..7 + const uint ib8 = idx % 4; const float d = float(data_a[ib].d); const uint qs = data_a[ib].qs[8 * ib32 + ib8]; @@ -562,63 +597,81 @@ void main() { data_a[ib].qs[8*ib32 + 6], data_a[ib].qs[8*ib32 + 7] )); - const float db = d * 0.25 * (0.5 + (signs >> 28)); + const FLOAT_TYPE db = FLOAT_TYPE(d * 0.25 * (0.5 + (signs >> 28))); const uint32_t sign7 = bitfieldExtract(signs, 7 * int(ib8), 7); - const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (2 * (idx % 4)); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint grid = iq2xxs_grid[qs][(idx % 4) / 2] >> (16 * (idx & 1)); - const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); // vec4 used due to #12147 - - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + const uint sign = sign7 | (bitCount(sign7) << 7); + const uvec2 grid = iq2xxs_grid[qs]; + const vec4 grid0 = vec4(unpack8(grid.x)); + const vec4 grid1 = vec4(unpack8(grid.y)); + + buf_a[buf_idx ] = db * FLOAT_TYPE((sign & 1) != 0 ? -grid0.x : grid0.x); + buf_a[buf_idx + 1] = db * FLOAT_TYPE((sign & 2) != 0 ? -grid0.y : grid0.y); + buf_a[buf_idx + 2] = db * FLOAT_TYPE((sign & 4) != 0 ? -grid0.z : grid0.z); + buf_a[buf_idx + 3] = db * FLOAT_TYPE((sign & 8) != 0 ? -grid0.w : grid0.w); + buf_a[buf_idx + 4] = db * FLOAT_TYPE((sign & 16) != 0 ? -grid1.x : grid1.x); + buf_a[buf_idx + 5] = db * FLOAT_TYPE((sign & 32) != 0 ? -grid1.y : grid1.y); + buf_a[buf_idx + 6] = db * FLOAT_TYPE((sign & 64) != 0 ? -grid1.z : grid1.z); + buf_a[buf_idx + 7] = db * FLOAT_TYPE((sign & 128) != 0 ? -grid1.w : grid1.w); #elif defined(DATA_A_IQ2_XS) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint ib32 = (idx % 128) / 16; // 0..7 - const uint ib8 = (idx / 4) % 4; // 0..3 + const uint ib = idx / 32; // 8 values per idx + const uint ib32 = (idx % 32) / 4; // 0..7 + const uint ib8 = idx % 4; // 0..3 const float d = float(data_a[ib].d); const uint scale = (data_a[ib].scales[ib32] >> (2 * (ib8 & 2))) & 0xf; - const float db = d * 0.25 * (0.5 + scale); + const FLOAT_TYPE db = FLOAT_TYPE(d * 0.25 * (0.5 + scale)); const uint qs = data_a[ib].qs[4 * ib32 + ib8]; const uint sign7 = qs >> 9; - const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (2 * (idx % 4)); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint grid = iq2xs_grid[qs & 511][(idx % 4) / 2] >> (16 * (idx & 1)); - const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); // vec4 used due to #12147 - - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + const uint sign = sign7 | (bitCount(sign7) << 7); + const uvec2 grid = iq2xs_grid[qs & 511]; + const vec4 grid0 = vec4(unpack8(grid.x)); + const vec4 grid1 = vec4(unpack8(grid.y)); + + buf_a[buf_idx ] = db * FLOAT_TYPE((sign & 1) != 0 ? -grid0.x : grid0.x); + buf_a[buf_idx + 1] = db * FLOAT_TYPE((sign & 2) != 0 ? -grid0.y : grid0.y); + buf_a[buf_idx + 2] = db * FLOAT_TYPE((sign & 4) != 0 ? -grid0.z : grid0.z); + buf_a[buf_idx + 3] = db * FLOAT_TYPE((sign & 8) != 0 ? -grid0.w : grid0.w); + buf_a[buf_idx + 4] = db * FLOAT_TYPE((sign & 16) != 0 ? -grid1.x : grid1.x); + buf_a[buf_idx + 5] = db * FLOAT_TYPE((sign & 32) != 0 ? -grid1.y : grid1.y); + buf_a[buf_idx + 6] = db * FLOAT_TYPE((sign & 64) != 0 ? -grid1.z : grid1.z); + buf_a[buf_idx + 7] = db * FLOAT_TYPE((sign & 128) != 0 ? -grid1.w : grid1.w); #elif defined(DATA_A_IQ2_S) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint ib8 = (idx % 128) / 4; // 0..31 - const uint ib32 = ib8 / 4; // 0..7 + const uint ib = idx / 32; // 8 values per idx + const uint ib8 = idx % 32; // 0..31 + const uint ib32 = ib8 / 4; // 0..7 const uint scale = (data_a[ib].scales[ib32] >> (2 * (ib8 & 2))) & 0xf; const uint qs = data_a[ib].qs[ib8]; const uint qh = data_a[ib].qh[ib32]; const uint qhshift = 2 * (ib8 % 4); - const uint sign = data_a[ib].qs[QUANT_K / 8 + ib8] >> (2 * (idx % 4)); + const uint sign = data_a[ib].qs[QUANT_K / 8 + ib8]; const float d = float(data_a[ib].d); - const float db = d * 0.25 * (0.5 + scale); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint16_t grid = unpack16(iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)][(idx & 2) >> 1])[idx & 1]; - const vec2 v = db * vec2(sign01) * vec2(unpack8(uint32_t(grid)).xy); // vec4 used due to #12147 - - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + const FLOAT_TYPE db = FLOAT_TYPE(d * 0.25 * (0.5 + scale)); + const uvec2 grid = iq2s_grid[qs | ((qh << (8 - qhshift)) & 0x300)]; + const vec4 grid0 = vec4(unpack8(grid.x)); + const vec4 grid1 = vec4(unpack8(grid.y)); + + buf_a[buf_idx ] = db * FLOAT_TYPE((sign & 1) != 0 ? -grid0.x : grid0.x); + buf_a[buf_idx + 1] = db * FLOAT_TYPE((sign & 2) != 0 ? -grid0.y : grid0.y); + buf_a[buf_idx + 2] = db * FLOAT_TYPE((sign & 4) != 0 ? -grid0.z : grid0.z); + buf_a[buf_idx + 3] = db * FLOAT_TYPE((sign & 8) != 0 ? -grid0.w : grid0.w); + buf_a[buf_idx + 4] = db * FLOAT_TYPE((sign & 16) != 0 ? -grid1.x : grid1.x); + buf_a[buf_idx + 5] = db * FLOAT_TYPE((sign & 32) != 0 ? -grid1.y : grid1.y); + buf_a[buf_idx + 6] = db * FLOAT_TYPE((sign & 64) != 0 ? -grid1.z : grid1.z); + buf_a[buf_idx + 7] = db * FLOAT_TYPE((sign & 128) != 0 ? -grid1.w : grid1.w); #elif defined(DATA_A_IQ3_XXS) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint iqs = (idx % 128) / 2; // 0..63 + const uint ib = idx / 64; // 4 values per idx + const uint iqs = idx % 64; // 0..63 const uint is = QUANT_K / 4 + 4 * (iqs / 8); // 8 values const float d = float(data_a[ib].d); @@ -631,33 +684,36 @@ void main() { )); const float db = d * 0.5 * (0.5 + (signs >> 28)); const uint32_t sign7 = bitfieldExtract(signs, 7 * (int(iqs / 2) % 4), 7); - const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (2 * (idx % 4)); - const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(int8_t(sign << 1), int8_t(sign)))); - const uint grid = iq3xxs_grid[qs] >> (16 * (idx & 1)); - const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); // vec4 used due to #12147 - - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + const uint sign = (sign7 | (bitCount(sign7) << 7)) >> (4 * (idx % 2)); + const uint grid = iq3xxs_grid[qs]; + const vec4 v = db * vec4(unpack8(grid)); + + buf_a[buf_idx ] = FLOAT_TYPE((sign & 1) != 0 ? -v.x : v.x); + buf_a[buf_idx + 1] = FLOAT_TYPE((sign & 2) != 0 ? -v.y : v.y); + buf_a[buf_idx + 2] = FLOAT_TYPE((sign & 4) != 0 ? -v.z : v.z); + buf_a[buf_idx + 3] = FLOAT_TYPE((sign & 8) != 0 ? -v.w : v.w); #elif defined(DATA_A_IQ3_S) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; - const uint ib = idx / 128; // 2 values per idx - const uint iqs = (idx % 128) / 2; // 0..63 + const uint ib = idx / 64; // 4 values per idx + const uint iqs = idx % 64; // 0..63 const uint iqh = iqs / 8; const float d = float(data_a[ib].d); const uint qs = data_a[ib].qs[iqs]; const uint qh = data_a[ib].qh[iqh]; - const int8_t sign = int8_t(data_a[ib].signs[iqs / 2] >> (2 * (idx % 4))); + const int8_t sign = int8_t(data_a[ib].signs[iqs / 2] >> (4 * (idx % 2))); const uint scale = data_a[ib].scales[iqs / 16]; const i8vec2 sign01 = i8vec2(1 - (2 & i8vec2(sign << 1, sign))); const float db = d * (1 + 2 * ((scale >> (4 * (iqh & 1))) & 0xf)); - const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)] >> (16 * (idx % 2)); - const vec2 v = db * vec2(sign01) * vec2(unpack8(grid).xy); // vec4 used due to #12147 + const uint32_t grid = iq3s_grid[qs | ((qh << (8 - (iqs % 8))) & 256)]; + const vec4 v = db * vec4(unpack8(grid)); - buf_a[buf_idx ] = FLOAT_TYPE(v.x); - buf_a[buf_idx + 1] = FLOAT_TYPE(v.y); + buf_a[buf_idx ] = FLOAT_TYPE((sign & 1) != 0 ? -v.x : v.x); + buf_a[buf_idx + 1] = FLOAT_TYPE((sign & 2) != 0 ? -v.y : v.y); + buf_a[buf_idx + 2] = FLOAT_TYPE((sign & 4) != 0 ? -v.z : v.z); + buf_a[buf_idx + 3] = FLOAT_TYPE((sign & 8) != 0 ? -v.w : v.w); #elif defined(DATA_A_IQ4_XS) const uint idx = pos_a + (loadc_a + l) * p.stride_a / LOAD_VEC_A + loadr_a; const uint buf_idx = (loadc_a + l) * SHMEM_STRIDE + loadr_a * LOAD_VEC_A; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp index 9184657573281..29e4b5c9ce2d4 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/mul_mm_cm2.comp @@ -162,17 +162,32 @@ void main() { _ne1 = 0; uint num_elements = p.nei1 * p.nei0; - for (uint i = gl_SubgroupInvocationID; subgroupAny(i < num_elements); i += gl_SubgroupSize) { + uint ids[16]; + uint iter = 0; + + for (uint j = 0; j < num_elements; j += gl_SubgroupSize) { + // prefetch up to 16 elements + if (iter == 0) { + [[unroll]] for (uint k = 0; k < 16; ++k) { + uint i = j + gl_SubgroupInvocationID + k*gl_SubgroupSize; + bool in_range = i < num_elements; + uint ii1 = i / p.nei0; + uint ii0 = i % p.nei0; + ids[k] = in_range ? data_ids[ii1*p.nbi1 + ii0] : 0; + } + } + uint i = j + gl_SubgroupInvocationID; bool in_range = i < num_elements; - uint ii0 = i % p.nei0; uint ii1 = i / p.nei0; - uint id = in_range ? data_ids[ii1*p.nbi1 + ii0] : 0; + uint ii0 = i % p.nei0; + uint id = ids[iter++]; uvec4 ballot = subgroupBallot(in_range && id == expert_idx); uint idx = subgroupBallotExclusiveBitCount(ballot); if (in_range && id == expert_idx) { row_ids[_ne1 + idx] = u16vec4(ii0 % p.ne11, ii1, ii0, 0); } _ne1 += subgroupBallotBitCount(ballot); + iter &= 15; } _ne1_sh = _ne1; } @@ -414,17 +429,31 @@ void main() { fetch_scales(ir * BM, pos_a, stride_a, block_k + BK, tid, false); } - coopmat mat_a; - coopmat mat_b; + if ((ir + 1) * BM <= p.M && block_k + BK <= end_k) { + coopmat mat_a; + coopmat mat_b; - coopMatLoadTensorNV(mat_a, data_a, pos_a, sliceTensorLayoutNV(tensorLayoutAClamp, ir * BM, BM, block_k, BK) DECODEFUNCA); + coopMatLoadTensorNV(mat_a, data_a, pos_a, sliceTensorLayoutNV(tensorLayoutA, ir * BM, BM, block_k, BK) DECODEFUNCA); #ifdef MUL_MAT_ID - coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutB, ic * BN, BN, block_k, BK), tensorViewTranspose, decodeFuncB); + coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutB, ic * BN, BN, block_k, BK), tensorViewTranspose, decodeFuncB); #else - coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutBClamp, ic * BN, BN, block_k, BK), tensorViewTranspose); + coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutBClamp, ic * BN, BN, block_k, BK), tensorViewTranspose); #endif - sum = coopMatMulAdd(mat_a, mat_b, sum); + sum = coopMatMulAdd(mat_a, mat_b, sum); + } else { + coopmat mat_a; + coopmat mat_b; + + coopMatLoadTensorNV(mat_a, data_a, pos_a, sliceTensorLayoutNV(tensorLayoutAClamp, ir * BM, BM, block_k, BK) DECODEFUNCA); +#ifdef MUL_MAT_ID + coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutB, ic * BN, BN, block_k, BK), tensorViewTranspose, decodeFuncB); +#else + coopMatLoadTensorNV(mat_b, data_b, pos_b, sliceTensorLayoutNV(tensorLayoutBClamp, ic * BN, BN, block_k, BK), tensorViewTranspose); +#endif + + sum = coopMatMulAdd(mat_a, mat_b, sum); + } } // Convert from ACC_TYPE to D_TYPE diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp new file mode 100644 index 0000000000000..0073d8f766610 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/reglu.comp @@ -0,0 +1,9 @@ +#version 450 + +#include "glu_head.comp" + +float op(float a, float b) { + return max(a, 0.0f) * b; +} + +#include "glu_main.comp" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp index deb8ee9960f58..6428ca7ba3300 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rms_norm.comp @@ -1,11 +1,13 @@ #version 450 -#include "generic_unary_head.comp" +#include "generic_binary_head.comp" #include "types.comp" #extension GL_EXT_control_flow_attributes : enable #define BLOCK_SIZE 512 +layout (constant_id = 1) const bool do_multiply = false; + layout(local_size_x = BLOCK_SIZE, local_size_y = 1, local_size_z = 1) in; shared FLOAT_TYPE sum[BLOCK_SIZE]; @@ -25,6 +27,7 @@ void main() { const uint stride_sample = p.nb03; uint32_t a_offset = samp*stride_sample + channel*stride_channel + row*stride_row + get_aoffset(); + uint32_t b_offset = src1_idx(0, row, channel, samp) + get_boffset(); uint32_t d_offset = ((samp*nchannels + channel)*nrows + row)*ncols + get_doffset(); sum[tid] = FLOAT_TYPE(0.0f); // partial sum for thread in warp @@ -46,7 +49,13 @@ void main() { const FLOAT_TYPE mean = sum[0] / FLOAT_TYPE(ncols); const FLOAT_TYPE scale = inversesqrt(mean + FLOAT_TYPE(p.param1)); - [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) { - data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col])); + if (do_multiply) { + [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) { + data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + col])); + } + } else { + [[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) { + data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col])); + } } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp b/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp new file mode 100644 index 0000000000000..b9abe8dedcf86 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/roll.comp @@ -0,0 +1,46 @@ +#version 450 + +#include "types.comp" +#include "generic_unary_head.comp" + +layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; + +uint wrap_idx(int i, uint ne) { + if (i < 0) { + return i + ne; + } else if (i >= ne) { + return i - ne; + } + return i; +} + +void main() { + const uint idx = get_idx(); + if (idx >= p.ne) { + return; + } + + const uint i3 = fastdiv(idx, p.ne1_012mp, p.ne1_012L); + const uint i3_offset = i3 * p.ne12*p.ne11*p.ne10; + const uint i2 = fastdiv(idx - i3_offset, p.ne1_01mp, p.ne1_01L); + const uint i2_offset = i2*p.ne11*p.ne10; + const uint i1 = fastdiv(idx - i3_offset - i2_offset, p.ne1_0mp, p.ne1_0L); + const uint i0 = idx - i3_offset - i2_offset - i1*p.ne10; + + const uint p1 = floatBitsToUint(p.param1); + const uint p2 = floatBitsToUint(p.param2); + const int s0 = int(p1 >> 16) - 0x8000; + const int s1 = int(p1 & 0xFFFF) - 0x8000; + const int s2 = int(p2 >> 16) - 0x8000; + const int s3 = int(p2 & 0xFFFF) - 0x8000; + + const uint i00 = wrap_idx(int(i0) - s0, p.ne10); + const uint i01 = wrap_idx(int(i1) - s1, p.ne11); + const uint i02 = wrap_idx(int(i2) - s2, p.ne12); + const uint i03 = wrap_idx(int(i3) - s3, p.ne13); + + const uint a_idx = i03*p.nb03 + i02*p.nb02 + i01*p.nb01 + i00*p.nb00; + const uint d_idx = i3 *p.nb13 + i2 *p.nb12 + i1 *p.nb11 + i0 *p.nb10; + + data_d[get_doffset() + d_idx] = D_TYPE(data_a[get_aoffset() + a_idx]); +} diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp index 96c9c4cbd307c..00e203e73bd1b 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_head.comp @@ -1,11 +1,8 @@ #include "types.comp" #extension GL_EXT_shader_16bit_storage : require -#extension GL_EXT_spirv_intrinsics: enable -#if RTE16 -spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits -#endif +#include "rte.comp" layout(local_size_x = 1, local_size_y = 256, local_size_z = 1) in; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp index 4f5b1a0ecaf5d..5808710ccf998 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_multi.comp @@ -14,21 +14,19 @@ void main() { const uint row_dst = gl_GlobalInvocationID.x; - if (i0 >= p.n_dims) { - const uint i = row_dst*ne0 + i0; - - data_d[i + 0] = data_a[i + 0]; - data_d[i + 1] = data_a[i + 1]; - - return; - } - const uint row_x = row_dst % ne1; const uint channel_x = row_dst / ne1; const uint idst = row_dst*ne0 + i0/2; const uint ix = channel_x*p.s2 + row_x*p.s1 + i0/2; + if (i0 >= p.n_dims) { + data_d[idst + i0/2 + 0] = data_a[ix + i0/2 + 0]; + data_d[idst + i0/2 + 1] = data_a[ix + i0/2 + 1]; + + return; + } + const int sect_dims = p.sections[0] + p.sections[1] + p.sections[2] + p.sections[3]; const int sec_w = p.sections[1] + p.sections[0]; const uint sector = (i0 / 2) % sect_dims; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp index db775c456cae8..366a7b1c47cdd 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_neox.comp @@ -13,21 +13,19 @@ void main() { const uint row_dst = gl_GlobalInvocationID.x; - if (i0 >= p.n_dims) { - const uint i = row_dst*ne0 + i0; - - data_d[i + 0] = data_a[i + 0]; - data_d[i + 1] = data_a[i + 1]; - - return; - } - const uint row_x = row_dst % ne1; const uint channel_x = row_dst / ne1; const uint idst = row_dst*ne0 + i0/2; const uint ix = channel_x*p.s2 + row_x*p.s1 + i0/2; + if (i0 >= p.n_dims) { + data_d[idst + i0/2 + 0] = data_a[ix + i0/2 + 0]; + data_d[idst + i0/2 + 1] = data_a[ix + i0/2 + 1]; + + return; + } + const float theta_base = data_pos[channel_x] * pow(p.theta_scale, i0/2.0f); const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp index 4ad35e549d77f..9643bca96ac92 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rope_norm.comp @@ -13,21 +13,19 @@ void main() { const uint row_dst = gl_GlobalInvocationID.x; - if (i0 >= p.n_dims) { - const uint i = row_dst*ne0 + i0; - - data_d[i + 0] = data_a[i + 0]; - data_d[i + 1] = data_a[i + 1]; - - return; - } - const uint row_x = row_dst % ne1; const uint channel_x = row_dst / ne1; const uint idst = row_dst*ne0 + i0; const uint ix = channel_x*p.s2 + row_x*p.s1 + i0; + if (i0 >= p.n_dims) { + data_d[idst + 0] = data_a[ix + 0]; + data_d[idst + 1] = data_a[ix + 1]; + + return; + } + const float theta_base = data_pos[channel_x] * pow(p.theta_scale, i0/2.0f); const float freq_factor = p.has_ff != 0 ? data_ff[i0/2] : 1.0f; diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp b/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp new file mode 100644 index 0000000000000..ad51c1e80b856 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/rte.comp @@ -0,0 +1,5 @@ + +#if RTE16 +#extension GL_EXT_spirv_intrinsics : enable +spirv_execution_mode(capabilities = [4467], 4462, 16); // RoundingModeRTE, 16 bits +#endif // RTE16 diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp index 4663428dee0a2..f10b0a02b5076 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/scale.comp @@ -18,7 +18,7 @@ void main() { continue; } - data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]) * FLOAT_TYPE(p.param1)); + data_d[get_doffset() + idx] = D_TYPE(FLOAT_TYPE(data_a[get_aoffset() + idx]) * FLOAT_TYPE(p.param1) + FLOAT_TYPE(p.param2)); idx += num_threads; } } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp index 51fc2dc7ed406..5bcd3b1e3ddc6 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/soft_max.comp @@ -6,6 +6,14 @@ layout (push_constant) uniform parameter { uint KX; uint KY; + uint ne00; + uint ne01; + uint ne02; + uint ne12; + uint ne13; + uint nb11; + uint nb12; + uint nb13; float scale; float max_bias; float m0; @@ -31,7 +39,15 @@ shared FLOAT_TYPE vals[BLOCK_SIZE]; void soft_max(uint num_iters) { const uint tid = gl_LocalInvocationID.x; const uint rowx = gl_WorkGroupID.z * 262144 + gl_WorkGroupID.y * 512 + gl_WorkGroupID.x; - const uint rowy = (p.KY > 0) ? (rowx % p.KY) : 0; + + const uint32_t i03 = rowx / (p.ne01 * p.ne02); + const uint32_t i02 = (rowx - i03 * p.ne01 * p.ne02) / p.ne01; + const uint32_t i01 = rowx % p.ne01; + + uint rowy_start = 0; + if (p.KY > 0) { + rowy_start = i01 * p.nb11 + (i02 % p.ne12) * p.nb12 + (i03 % p.ne13) * p.nb13; + } if (rowx >= p.nrows_x) { return; @@ -41,7 +57,7 @@ void soft_max(uint num_iters) { // ALiBi if (p.max_bias > 0.0f) { - const uint h = rowx/p.KY; // head index + const uint h = (rowx / p.ne01) % p.ne02; // head index const float base = h < p.n_head_log2 ? p.m0 : p.m1; const uint exp = h < p.n_head_log2 ? h + 1 : 2*(h - p.n_head_log2) + 1; @@ -67,7 +83,7 @@ void soft_max(uint num_iters) { FLOAT_TYPE b = FLOAT_TYPE(0); if (p.KY > 0 && col < p.KX) { - b = data_b[rowy * p.KX + col]; + b = data_b[rowy_start + col]; } FLOAT_TYPE v = a * p.scale + slope * b; @@ -111,7 +127,7 @@ void soft_max(uint num_iters) { if (idx < DATA_CACHE_SIZE) { val = exp(data_cache[idx] - max_val); } else { - val = exp(FLOAT_TYPE(data_a[i]) * p.scale + (p.KY > 0 ? slope * FLOAT_TYPE(data_b[rowy * p.KX + col]) : FLOAT_TYPE(0.0f)) - max_val); + val = exp(FLOAT_TYPE(data_a[i]) * p.scale + (p.KY > 0 ? slope * FLOAT_TYPE(data_b[rowy_start + col]) : FLOAT_TYPE(0.0f)) - max_val); } sum += val; if (idx < DATA_CACHE_SIZE) { diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp new file mode 100644 index 0000000000000..a28e7c6cc8660 --- /dev/null +++ b/ggml/src/ggml-vulkan/vulkan-shaders/swiglu.comp @@ -0,0 +1,9 @@ +#version 450 + +#include "glu_head.comp" + +float op(float a, float b) { + return a / (1.0f + exp(-a)) * b; +} + +#include "glu_main.comp" diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp index 6f607380df8bf..74771def0f98e 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/upscale.comp @@ -3,6 +3,7 @@ layout (push_constant) uniform parameter { uint ne; uint a_offset; uint d_offset; + uint ne00; uint ne01; uint nb00; uint nb01; uint nb02; uint nb03; uint ne10; uint ne11; uint ne12; uint ne13; float sf0; float sf1; float sf2; float sf3; @@ -15,6 +16,61 @@ layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in; layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; +// from ggml.h: enum ggml_scale_mode, enum ggml_scale_flag +#define NEAREST 0 +#define BILINEAR 1 +#define ALIGN_CORNERS (1 << 8) + +layout (constant_id = 0) const uint scale_mode = 0; + +float fetch_nearest(uint i10, uint i11, uint i12, uint i13) { + const uint i00 = uint(i10 / p.sf0); + const uint i01 = uint(i11 / p.sf1); + const uint i02 = uint(i12 / p.sf2); + const uint i03 = uint(i13 / p.sf3); + + return data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + i01 * p.nb01 + i00 * p.nb00]; +} + +float fetch_bilinear(ivec2 c0, ivec2 c1, vec2 d, uint i12, uint i13) { + const uint i02 = uint(i12 / p.sf2); + const uint i03 = uint(i13 / p.sf3); + const uint base = p.a_offset + i03 * p.nb03 + i02 * p.nb02; + + const float v00 = data_a[base + c0.y * p.nb01 + c0.x * p.nb00]; + const float v01 = data_a[base + c0.y * p.nb01 + c1.x * p.nb00]; + const float v10 = data_a[base + c1.y * p.nb01 + c0.x * p.nb00]; + const float v11 = data_a[base + c1.y * p.nb01 + c1.x * p.nb00]; + + return + v00 * (1.0-d.x) * (1.0-d.y) + + v01 * d.x * (1.0-d.y) + + v10 * (1.0-d.x) * d.y + + v11 * d.x * d.y; +} + +float interpolate_bilinear(uint i10, uint i11, uint i12, uint i13) { + const ivec2 ne0 = ivec2(p.ne00, p.ne01); + + const vec2 c = (vec2(i10, i11) + 0.5) / vec2(p.sf0, p.sf1) - 0.5; + const vec2 c0f = floor(c); + const vec2 d = c - c0f; + const ivec2 c0 = max(ivec2(c0f), 0); + const ivec2 c1 = min(ivec2(c0f + 1), ne0 - 1); + + return fetch_bilinear(c0, c1, d, i12, i13); +} + +float interpolate_bilinear_align_corners(uint i10, uint i11, uint i12, uint i13) { + const vec2 c = vec2(i10, i11) / vec2(p.sf0, p.sf1); + const vec2 c0f = floor(c); + const vec2 d = c - c0f; + const ivec2 c0 = ivec2(c0f); + const ivec2 c1 = c0 + 1; + + return fetch_bilinear(c0, c1, d, i12, i13); +} + void main() { const uint idx = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x; @@ -27,10 +83,18 @@ void main() { const uint i12 = (idx / (p.ne10 * p.ne11)) % p.ne12; const uint i13 = (idx / (p.ne10 * p.ne11 * p.ne12)) % p.ne13; - const uint i00 = uint(i10 / p.sf0); - const uint i01 = uint(i11 / p.sf1); - const uint i02 = uint(i12 / p.sf2); - const uint i03 = uint(i13 / p.sf3); + float result; + switch (scale_mode) { + case NEAREST: + result = fetch_nearest(i10, i11, i12, i13); + break; + case BILINEAR: + result = interpolate_bilinear(i10, i11, i12, i13); + break; + case BILINEAR | ALIGN_CORNERS: + result = interpolate_bilinear_align_corners(i10, i11, i12, i13); + break; + } - data_d[p.d_offset + idx] = D_TYPE(data_a[p.a_offset + i03 * p.nb03 + i02 * p.nb02 + i01 * p.nb01 + i00 * p.nb00]); + data_d[p.d_offset + idx] = D_TYPE(result); } diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp index c63345ec8b4b6..809c0bd9bd305 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp @@ -360,9 +360,9 @@ void matmul_shaders(bool fp16, bool matmul_id, bool coopmat, bool coopmat2, bool for (const auto& tname : type_names) { std::string load_vec_quant = "2"; - if ((tname == "q4_0") || (tname == "q4_1")) + if ((tname == "q4_0") || (tname == "q4_1") || (tname == "iq1_s") || (tname == "iq1_m") || (tname == "iq2_xxs") || (tname == "iq2_xs") || (tname == "iq2_s")) load_vec_quant = "8"; - else if ((tname == "q5_0") || (tname == "q5_1") || (tname == "q8_0") || (tname == "iq4_nl")) + else if ((tname == "q5_0") || (tname == "q5_1") || (tname == "q8_0") || (tname == "iq3_xxs") || (tname == "iq3_s") || (tname == "iq4_nl")) load_vec_quant = "4"; if (tname == "bf16") { @@ -497,7 +497,7 @@ void process_shaders() { // Norms string_to_spv("norm_f32", "norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}})); string_to_spv("group_norm_f32", "group_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}})); - string_to_spv("rms_norm_f32", "rms_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}})); + string_to_spv("rms_norm_f32", "rms_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}})); string_to_spv("rms_norm_back_f32", "rms_norm_back.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}})); string_to_spv("l2_norm_f32", "l2_norm.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}})); @@ -518,6 +518,11 @@ void process_shaders() { string_to_spv("cpy_" + t + "_f32", "copy_from_quant.comp", {{"DATA_A_" + to_uppercase(t), "1"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}}); } + for (std::string t : {"f32", "f16", "bf16", "q4_0", "q4_1", "q5_0", "q5_1", "q8_0", "iq4_nl"}) { + string_to_spv("set_rows_" + t, "copy_to_quant.comp", {{"SET_ROWS", "1"}, {"DATA_A_" + to_uppercase(t), "1"}, {"B_TYPE", "uvec2"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}}); + string_to_spv("set_rows_" + t + "_rte", "copy_to_quant.comp", {{"SET_ROWS", "1"}, {"DATA_A_" + to_uppercase(t), "1"}, {"B_TYPE", "uvec2"}, {"D_TYPE", "float"}, {"FLOAT_TYPE", "float"}, {"RTE16", "1"}}); + } + auto get_type_str = [](bool f16) { return f16 ? "float16_t" : "float"; }; @@ -532,8 +537,10 @@ void process_shaders() { for (auto src0_f16 : {false, true}) { for (auto src1_f16 : {false, true}) { for (auto dst_f16 : {false, true}) { - auto name = op + get_suffix(src0_f16, src1_f16, dst_f16); - string_to_spv(name.c_str(), op + ".comp", {{"A_TYPE", get_type_str(src0_f16)}, {"B_TYPE", get_type_str(src1_f16)}, {"D_TYPE", get_type_str(dst_f16)}, {"FLOAT_TYPE", "float"}}); + for (auto rte : {false, true}) { + auto name = op + get_suffix(src0_f16, src1_f16, dst_f16) + (rte ? "_rte" : ""); + string_to_spv(name.c_str(), op + ".comp", {{"A_TYPE", get_type_str(src0_f16)}, {"B_TYPE", get_type_str(src1_f16)}, {"D_TYPE", get_type_str(dst_f16)}, {"FLOAT_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + } } } } @@ -574,6 +581,8 @@ void process_shaders() { string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); + string_to_spv("gelu_erf_f16", "gelu_erf.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); + string_to_spv("gelu_erf_f32", "gelu_erf.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); string_to_spv("gelu_quick_f16", "gelu_quick.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); string_to_spv("gelu_quick_f32", "gelu_quick.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); string_to_spv("silu_f16", "silu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); @@ -585,6 +594,20 @@ void process_shaders() { string_to_spv("sigmoid_f16", "sigmoid.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}}); string_to_spv("sigmoid_f32", "sigmoid.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); + for (auto rte : {false, true}) { + std::string suffix = rte ? "_rte" : ""; + string_to_spv("geglu_f16" + suffix, "geglu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("geglu_f32" + suffix, "geglu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("reglu_f16" + suffix, "reglu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("reglu_f32" + suffix, "reglu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("swiglu_f16" + suffix, "swiglu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("swiglu_f32" + suffix, "swiglu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("geglu_erf_f16" + suffix, "geglu_erf.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("geglu_erf_f32" + suffix, "geglu_erf.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("geglu_quick_f16" + suffix,"geglu_quick.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"RTE16", rte ? "1" : "0"}}); + string_to_spv("geglu_quick_f32" + suffix,"geglu_quick.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}, {"RTE16", rte ? "1" : "0"}}); + } + string_to_spv("leaky_relu_f32", "leaky_relu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}}); string_to_spv("silu_back_f32", "silu_back.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}}); @@ -635,6 +658,8 @@ void process_shaders() { string_to_spv("conv2d_dw_whcn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"WHCN", "1"}})); string_to_spv("conv2d_dw_cwhn_f32", "conv2d_dw.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}, {"CWHN", "1"}})); + string_to_spv("roll_f32", "roll.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}})); + for (auto &c : compiles) { c.wait(); } @@ -689,11 +714,59 @@ void write_output_files() { std::remove(path.c_str()); } } + + std::string suffixes[2] = {"_f32", "_f16"}; for (const char *op : {"add", "sub", "mul", "div"}) { - fprintf(hdr, "extern unsigned char *%s_data[2][2][2];\n", op); - fprintf(hdr, "extern uint64_t %s_len[2][2][2];\n", op); - fprintf(src, "unsigned char *%s_data[2][2][2] = {{{%s_f32_f32_f32_data, %s_f32_f32_f16_data}, {%s_f32_f16_f32_data, %s_f32_f16_f16_data}}, {{%s_f16_f32_f32_data, %s_f16_f32_f16_data}, {%s_f16_f16_f32_data, %s_f16_f16_f16_data}}};\n", op, op, op, op, op, op, op, op, op); - fprintf(src, "uint64_t %s_len[2][2][2] = {{{%s_f32_f32_f32_len, %s_f32_f32_f16_len}, {%s_f32_f16_f32_len, %s_f32_f16_f16_len}}, {{%s_f16_f32_f32_len, %s_f16_f32_f16_len}, {%s_f16_f16_f32_len, %s_f16_f16_f16_len}}};\n", op, op, op, op, op, op, op, op, op); + fprintf(hdr, "extern unsigned char *%s_data[2][2][2][2];\n", op); + fprintf(hdr, "extern uint64_t %s_len[2][2][2][2];\n", op); + std::string data = "unsigned char *" + std::string(op) + "_data[2][2][2][2] = "; + std::string len = "uint64_t " + std::string(op) + "_len[2][2][2][2] = "; + for (uint32_t t0 = 0; t0 < 2; ++t0) { + if (t0 == 0) { + data += "{"; + len += "{"; + } + for (uint32_t t1 = 0; t1 < 2; ++t1) { + if (t1 == 0) { + data += "{"; + len += "{"; + } + for (uint32_t t2 = 0; t2 < 2; ++t2) { + if (t2 == 0) { + data += "{"; + len += "{"; + } + for (uint32_t rte = 0; rte < 2; ++rte) { + if (rte == 0) { + data += "{"; + len += "{"; + } + data += op + suffixes[t0] + suffixes[t1] + suffixes[t2] + ((rte != 0) ? "_rte" : ""); + len += op + suffixes[t0] + suffixes[t1] + suffixes[t2] + ((rte != 0) ? "_rte" : ""); + data += "_data,"; + len += "_len,"; + if (rte == 1) { + data += "}, "; + len += "}, "; + } + } + if (t2 == 1) { + data += "}, "; + len += "}, "; + } + } + if (t1 == 1) { + data += "}, "; + len += "}, "; + } + } + if (t0 == 1) { + data += "};\n"; + len += "};\n"; + } + } + fprintf(src, data.c_str()); + fprintf(src, len.c_str()); } fclose(hdr); fclose(src); diff --git a/ggml/src/ggml-webgpu/CMakeLists.txt b/ggml/src/ggml-webgpu/CMakeLists.txt new file mode 100644 index 0000000000000..79ef68b85a477 --- /dev/null +++ b/ggml/src/ggml-webgpu/CMakeLists.txt @@ -0,0 +1,54 @@ +cmake_minimum_required(VERSION 3.13) + +find_package(Python3 REQUIRED) + +# Shader locations +set(SHADER_DIR "${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders") +set(SHADER_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/generated") +set(SHADER_HEADER "${SHADER_OUTPUT_DIR}/ggml-wgsl-shaders.hpp") +file(MAKE_DIRECTORY ${SHADER_OUTPUT_DIR}) + +message(STATUS "Shader output dir: ${SHADER_OUTPUT_DIR}") + +# Find all WGSL files +file(GLOB WGSL_SHADER_FILES "${SHADER_DIR}/*.wgsl") + +# Generate the header using a Python script +add_custom_command( + OUTPUT ${SHADER_HEADER} + COMMAND ${CMAKE_COMMAND} -E echo "Embedding WGSL shaders to ggml-wgsl-shaders.hpp" + COMMAND ${CMAKE_COMMAND} -E make_directory ${SHADER_OUTPUT_DIR} + COMMAND ${CMAKE_COMMAND} -E env PYTHONIOENCODING=utf-8 + ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py + --input "${SHADER_DIR}" + --output "${SHADER_HEADER}" + DEPENDS ${WGSL_SHADER_FILES} ${CMAKE_CURRENT_SOURCE_DIR}/wgsl-shaders/embed_wgsl.py + VERBATIM +) + +add_custom_target(generate_shaders DEPENDS ${SHADER_HEADER}) + +ggml_add_backend_library(ggml-webgpu + ggml-webgpu.cpp + ${SHADER_HEADER} + ../../include/ggml-webgpu.h +) + +add_dependencies(ggml-webgpu generate_shaders) + +if(EMSCRIPTEN) + set(EMDAWNWEBGPU_DIR "" CACHE PATH "Path to emdawnwebgpu_pkg") + + target_compile_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") + target_link_options(ggml-webgpu PRIVATE "--use-port=${EMDAWNWEBGPU_DIR}/emdawnwebgpu.port.py") +else() + find_package(Dawn REQUIRED) + set(DawnWebGPU_TARGET dawn::webgpu_dawn) +endif() + +if (GGML_WEBGPU_DEBUG) + target_compile_definitions(ggml-webgpu PRIVATE GGML_WEBGPU_DEBUG=1) +endif() + +target_include_directories(ggml-webgpu PRIVATE ${SHADER_OUTPUT_DIR}) +target_link_libraries(ggml-webgpu PRIVATE ${DawnWebGPU_TARGET}) diff --git a/ggml/src/ggml-webgpu/ggml-webgpu.cpp b/ggml/src/ggml-webgpu/ggml-webgpu.cpp new file mode 100644 index 0000000000000..c5abc69343357 --- /dev/null +++ b/ggml/src/ggml-webgpu/ggml-webgpu.cpp @@ -0,0 +1,907 @@ +#include "ggml-webgpu.h" + +#include + +#include "ggml-impl.h" +#include "ggml-backend-impl.h" + +#include "ggml-wgsl-shaders.hpp" + +#include +#include +#include +#include + +#ifdef GGML_WEBGPU_DEBUG +#define WEBGPU_LOG_DEBUG(msg) std::cout << msg << std::endl +#else +#define WEBGPU_LOG_DEBUG(msg) ((void) 0) +#endif // GGML_WEBGPU_DEBUG + +/* Constants */ + +#define WEBGPU_MUL_MAT_WG_SIZE 64 +#define WEBGPU_MUL_MAT_PARAMS_SIZE (13 * sizeof(uint32_t)) // M, N, K, batch sizes, broadcasts +#define WEBGPU_CPY_PARAMS_SIZE (15 * sizeof(uint32_t)) // strides and offsets +#define WEBGPU_STORAGE_BUF_BINDING_MULT 4 // a storage buffer binding size must be a multiple of 4 + +/* End Constants */ + +// This is a "fake" base pointer, since WebGPU buffers do not have pointers to their locations. +static void * const webgpu_ptr_base = (void *)(uintptr_t) 0x1000; // NOLINT + +// Always returns the base offset of a tensor, regardless of views. +static uint64_t webgpu_tensor_offset(const ggml_tensor * tensor) { + if (tensor->view_src) { + return (uint8_t *) tensor->view_src->data - (uint8_t *) webgpu_ptr_base; + } + return (uint8_t *) tensor->data - (uint8_t *) webgpu_ptr_base; +} + +/* Struct definitions */ + +// All the base objects needed to run operations on a WebGPU device +struct webgpu_context_struct { + wgpu::Instance instance; + wgpu::Adapter adapter; + wgpu::Device device; + wgpu::Queue queue; + wgpu::Limits limits; + wgpu::SupportedFeatures features; + + std::mutex mutex; + bool device_initialized = false; + + // pipelines and parameter buffers + // TODO: reuse params buffers for different pipelines when possible + wgpu::ComputePipeline memset_pipeline; + wgpu::Buffer memset_params_dev_buf; + wgpu::Buffer memset_params_host_buf; + wgpu::ComputePipeline mul_mat_pipeline; + wgpu::Buffer mul_mat_params_dev_buf; + wgpu::Buffer mul_mat_params_host_buf; + wgpu::ComputePipeline cpy_pipeline; + wgpu::Buffer cpy_params_dev_buf; + wgpu::Buffer cpy_params_host_buf; + + size_t memset_bytes_per_thread; + + // Staging buffer for reading data from the GPU + wgpu::Buffer get_tensor_staging_buf; +}; + +typedef std::shared_ptr webgpu_context; + +struct ggml_backend_webgpu_reg_context { + webgpu_context webgpu_ctx; + + size_t device_count; + const char * name; +}; + +struct ggml_backend_webgpu_device_context { + webgpu_context webgpu_ctx; + + std::string device_name; + std::string device_desc; +}; + +struct ggml_backend_webgpu_context { + webgpu_context webgpu_ctx; + + std::string name; +}; + +struct ggml_backend_webgpu_buffer_context { + webgpu_context webgpu_ctx; + + wgpu::Buffer buffer; + + ggml_backend_webgpu_buffer_context(webgpu_context ctx, wgpu::Buffer buf) : + webgpu_ctx(ctx), buffer(buf) { + } +}; + +/* End struct definitions */ + +/* WebGPU object initializations */ + +static void ggml_webgpu_create_pipeline(wgpu::Device &device, wgpu::ComputePipeline &pipeline, const char * shader_code, const char * label, const std::vector &constants = {}) { + WEBGPU_LOG_DEBUG("ggml_webgpu_create_pipeline()"); + wgpu::ShaderSourceWGSL shader_source; + shader_source.code = shader_code; + wgpu::ShaderModuleDescriptor shader_desc; + shader_desc.nextInChain = &shader_source; + wgpu::ShaderModule shader_module = device.CreateShaderModule(&shader_desc); + + wgpu::ComputePipelineDescriptor pipeline_desc; + pipeline_desc.label = label; + pipeline_desc.compute.module = shader_module; + pipeline_desc.compute.entryPoint = "main"; // Entry point in the WGSL code + pipeline_desc.layout = nullptr; // nullptr means auto layout + if (constants.size() > 0) { + pipeline_desc.compute.constants = constants.data(); + pipeline_desc.compute.constantCount = constants.size(); + } + pipeline = device.CreateComputePipeline(&pipeline_desc); +} + +static void ggml_webgpu_create_buffer(wgpu::Device &device, wgpu::Buffer &buffer, size_t size, wgpu::BufferUsage usage, const char* label) { + WEBGPU_LOG_DEBUG("ggml_webgpu_create_buffer()"); + + wgpu::BufferDescriptor buffer_desc; + buffer_desc.size = size; + buffer_desc.usage = usage; + buffer_desc.label = label; + buffer_desc.mappedAtCreation = false; + // TODO: error handling + buffer = device.CreateBuffer(&buffer_desc); +} + +/** End WebGPU object initializations */ + +/** WebGPU Actions */ + +static void ggml_backend_webgpu_map_buffer(webgpu_context ctx, wgpu::Buffer buffer, wgpu::MapMode mode, size_t offset, size_t size) { + ctx->instance.WaitAny(buffer.MapAsync( + mode, offset, size, wgpu::CallbackMode::WaitAnyOnly, + [](wgpu::MapAsyncStatus status, wgpu::StringView message) { + if (status != wgpu::MapAsyncStatus::Success) { + GGML_LOG_ERROR("ggml_webgpu: Failed to map buffer: %s\n", message.data); + } + }), + UINT64_MAX + ); +} + +static void ggml_backend_webgpu_buffer_memset(webgpu_context ctx, wgpu::Buffer buf, uint32_t value, size_t offset, size_t size) { + std::lock_guard lock(ctx->mutex); + wgpu::Device device = ctx->device; + + // map the host parameters buffer + ggml_backend_webgpu_map_buffer(ctx, ctx->memset_params_host_buf, wgpu::MapMode::Write, 0, ctx->memset_params_host_buf.GetSize()); + uint32_t * params = (uint32_t *) ctx->memset_params_host_buf.GetMappedRange(); + + params[0] = (uint32_t)offset; + params[1] = (uint32_t)size; + params[2] = value; + ctx->memset_params_host_buf.Unmap(); + + wgpu::BindGroupEntry entries[2]; + entries[0].binding = 0; // binding for the buffer to memset + entries[0].buffer = buf; + entries[0].offset = 0; + entries[0].size = buf.GetSize(); + entries[1].binding = 1; // binding for the parameters + entries[1].buffer = ctx->memset_params_dev_buf; + entries[1].offset = 0; + entries[1].size = ctx->memset_params_dev_buf.GetSize(); + + wgpu::BindGroupDescriptor bind_group_desc; + bind_group_desc.layout = ctx->memset_pipeline.GetBindGroupLayout(0); + bind_group_desc.entryCount = 2; + bind_group_desc.label = "ggml_memset"; + bind_group_desc.entries = entries; + wgpu::BindGroup bind_group = device.CreateBindGroup(&bind_group_desc); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyBufferToBuffer( + ctx->memset_params_host_buf, 0, + ctx->memset_params_dev_buf, 0, + ctx->memset_params_dev_buf.GetSize() + ); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.SetPipeline(ctx->memset_pipeline); + pass.SetBindGroup(0, bind_group); + size_t bytes_per_wg = ctx->limits.maxComputeWorkgroupSizeX * ctx->memset_bytes_per_thread; + pass.DispatchWorkgroups(((size + 3) + bytes_per_wg - 1) / bytes_per_wg, 1, 1); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + + ctx->queue.Submit(1, &commands); +} + +static void ggml_backend_webgpu_wait_on_submission(webgpu_context ctx) { + // Wait for the queue to finish processing all commands + ctx->instance.WaitAny(ctx->queue.OnSubmittedWorkDone(wgpu::CallbackMode::WaitAnyOnly, + [](wgpu::QueueWorkDoneStatus status, wgpu::StringView message) { + if (status != wgpu::QueueWorkDoneStatus::Success) { + GGML_LOG_ERROR("ggml_webgpu: Failed to wait on queue: %s\n", message.data); + } + }), + UINT64_MAX + ); +} + +/** End WebGPU Actions */ + +/** GGML Backend Interface */ + +static const char * ggml_backend_webgpu_name(ggml_backend_t backend) { + ggml_backend_webgpu_context * ctx = (ggml_backend_webgpu_context *)backend->context; + return ctx->name.c_str(); +} + +static void ggml_backend_webgpu_free(ggml_backend_t backend) { + ggml_backend_webgpu_context * ctx = (ggml_backend_webgpu_context *)backend->context; + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_free(" << ctx->name << ")"); + + // TODO: cleanup + GGML_UNUSED(ctx); +} + +// Returns true if node has enqueued work into the queue, false otherwise +static bool ggml_webgpu_encode_node(webgpu_context ctx, ggml_tensor * node){ + if (ggml_is_empty(node)) { + return false; + } + + WEBGPU_LOG_DEBUG("ggml_webgpu_encode_node(" << node << ", " << ggml_op_name(node->op) << ")"); + + + switch (node->op) { + // no-ops + case GGML_OP_NONE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + return false; + + case GGML_OP_CPY: { + std::lock_guard lock(ctx->mutex); + const ggml_tensor * src = node->src[0]; + ggml_backend_webgpu_buffer_context * src_ctx = (ggml_backend_webgpu_buffer_context *) src->buffer->context; + size_t src_offset = webgpu_tensor_offset(src) + src->view_offs; + // assumes power of 2 offset alignment + size_t src_misalignment = src_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1); + // align to minimum offset alignment + src_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1); + ggml_backend_webgpu_buffer_context * dst_ctx = (ggml_backend_webgpu_buffer_context *) node->buffer->context; + size_t dst_offset = webgpu_tensor_offset(node) + node->view_offs; + size_t dst_misalignment = dst_offset & (ctx->limits.minStorageBufferOffsetAlignment - 1); + dst_offset &= ~(ctx->limits.minStorageBufferOffsetAlignment - 1); + + wgpu::Device device = ctx->device; + ggml_backend_webgpu_map_buffer(ctx, ctx->cpy_params_host_buf, + wgpu::MapMode::Write, 0, ctx->cpy_params_host_buf.GetSize()); + uint32_t * params = (uint32_t *) ctx->cpy_params_host_buf.GetMappedRange(); + uint32_t ne = (uint32_t)ggml_nelements(node); + params[0] = ne; + params[1] = src_misalignment/ggml_type_size(src->type); + params[2] = dst_misalignment/ggml_type_size(node->type); + + // Convert byte-strides to element-strides + params[3] = (uint32_t)src->nb[0]/ggml_type_size(src->type); + params[4] = (uint32_t)src->nb[1]/ggml_type_size(src->type); + params[5] = (uint32_t)src->nb[2]/ggml_type_size(src->type); + params[6] = (uint32_t)src->nb[3]/ggml_type_size(src->type); + params[7] = (uint32_t)node->nb[0]/ggml_type_size(node->type); + params[8] = (uint32_t)node->nb[1]/ggml_type_size(node->type); + params[9] = (uint32_t)node->nb[2]/ggml_type_size(node->type); + params[10] = (uint32_t)node->nb[3]/ggml_type_size(node->type); + // Logical shape — same for both tensors even if permuted + params[11] = (uint32_t)(src->ne[0]); + params[12] = (uint32_t)(src->ne[1]); + params[13] = (uint32_t)(src->ne[2]); + params[14] = (uint32_t)(src->ne[3]); + + ctx->cpy_params_host_buf.Unmap(); + + wgpu::BindGroupEntry entries[3]; + entries[0].binding = 0; + entries[0].buffer = src_ctx->buffer; + entries[0].offset = src_offset; + entries[0].size = (ggml_nbytes(src) + src_misalignment + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) & ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1); + + entries[1].binding = 1; + entries[1].buffer = dst_ctx->buffer; + entries[1].offset = dst_offset; + entries[1].size = (ggml_nbytes(node) + dst_misalignment + WEBGPU_STORAGE_BUF_BINDING_MULT - 1) & ~(WEBGPU_STORAGE_BUF_BINDING_MULT - 1); + + entries[2].binding = 2; + entries[2].buffer = ctx->cpy_params_dev_buf; + entries[2].offset = 0; + entries[2].size = ctx->cpy_params_dev_buf.GetSize(); + + wgpu::BindGroupDescriptor bind_group_desc; + bind_group_desc.layout = ctx->cpy_pipeline.GetBindGroupLayout(0); + bind_group_desc.label = "ggml_op_cpy"; + bind_group_desc.entryCount = 3; + bind_group_desc.entries = entries; + wgpu::BindGroup bind_group = device.CreateBindGroup(&bind_group_desc); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyBufferToBuffer( + ctx->cpy_params_host_buf, 0, + ctx->cpy_params_dev_buf, 0, + ctx->cpy_params_dev_buf.GetSize() + ); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.SetPipeline(ctx->cpy_pipeline); + pass.SetBindGroup(0, bind_group); + size_t max_wg_size = ctx->limits.maxComputeWorkgroupSizeX; + pass.DispatchWorkgroups((ne + max_wg_size - 1) / max_wg_size); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + + // TODO, don't submit here, batch submissions + ctx->queue.Submit(1, &commands); + // TODO, don't wait on submission here + ggml_backend_webgpu_wait_on_submission(ctx); + return true; + } + + case GGML_OP_MUL_MAT: + { + const ggml_tensor * src0 = node->src[0]; + ggml_backend_webgpu_buffer_context * src0_ctx = (ggml_backend_webgpu_buffer_context *) src0->buffer->context; + size_t src0_offset = webgpu_tensor_offset(src0) + src0->view_offs; + const ggml_tensor * src1 = node->src[1]; + ggml_backend_webgpu_buffer_context * src1_ctx = (ggml_backend_webgpu_buffer_context *) src1->buffer->context; + size_t src1_offset = webgpu_tensor_offset(src1) + src1->view_offs; + ggml_backend_webgpu_buffer_context * dst_ctx = (ggml_backend_webgpu_buffer_context *) node->buffer->context; + + size_t dst_offset = webgpu_tensor_offset(node) + node->view_offs; + + wgpu::Device device = ctx->device; + + // map the host parameters buffer + ggml_backend_webgpu_map_buffer(ctx, ctx->mul_mat_params_host_buf, + wgpu::MapMode::Write, 0, ctx->mul_mat_params_host_buf.GetSize()); + uint32_t * params = (uint32_t *) ctx->mul_mat_params_host_buf.GetMappedRange(); + + params[0] = (uint32_t)node->ne[1]; // number of rows in result (M) + params[1] = (uint32_t)node->ne[0]; // number of columns in result (N) + params[2] = (uint32_t)src0->ne[0]; // number of columns in src0/src1 (K) + + params[3] = (uint32_t)src0->nb[1]/ggml_type_size(src0->type); // stride (elements) of src0 in dimension 1 + params[4] = (uint32_t)src1->nb[1]/ggml_type_size(src1->type); // stride (elements) of src1 in dimension 1 + params[5] = (uint32_t)src0->nb[2]/ggml_type_size(src0->type); // stride (elements) of src0 in dimension 2 + params[6] = (uint32_t)src1->nb[2]/ggml_type_size(src1->type); // stride (elements) of src1 in dimension 2 + params[7] = (uint32_t)src0->nb[3]/ggml_type_size(src0->type); // stride (elements) of src0 in dimension 3 + params[8] = (uint32_t)src1->nb[3]/ggml_type_size(src1->type); // stride (elements) of src1 in dimension 3 + + params[9] = (uint32_t)src0->ne[2]; // batch size in dimension 2 + params[10] = (uint32_t)src0->ne[3]; // batch size in dimension 3 + params[11] = (uint32_t)(src1->ne[2]/src0->ne[2]); // broadcast in dimension 2 + params[12] = (uint32_t)(src1->ne[3]/src0->ne[3]); // broadcast in dimension 3 + + ctx->mul_mat_params_host_buf.Unmap(); + + wgpu::BindGroupEntry entries[4]; + entries[0].binding = 0; + entries[0].buffer = src0_ctx->buffer; + entries[0].offset = src0_offset; + entries[0].size = ggml_nbytes(src0); + + entries[1].binding = 1; + entries[1].buffer = src1_ctx->buffer; + entries[1].offset = src1_offset; + entries[1].size = ggml_nbytes(src1); + + entries[2].binding = 2; + entries[2].buffer = dst_ctx->buffer; + entries[2].offset = dst_offset; + entries[2].size = ggml_nbytes(node); + + entries[3].binding = 3; + entries[3].buffer = ctx->mul_mat_params_dev_buf; + entries[3].offset = 0; + entries[3].size = ctx->mul_mat_params_dev_buf.GetSize(); + + wgpu::BindGroupDescriptor bind_group_desc; + bind_group_desc.layout = ctx->mul_mat_pipeline.GetBindGroupLayout(0); + bind_group_desc.entryCount = 4; + bind_group_desc.label = "ggml_op_mul_mat"; + bind_group_desc.entries = entries; + wgpu::BindGroup bind_group = device.CreateBindGroup(&bind_group_desc); + + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyBufferToBuffer( + ctx->mul_mat_params_host_buf, 0, + ctx->mul_mat_params_dev_buf, 0, + ctx->mul_mat_params_dev_buf.GetSize() + ); + wgpu::ComputePassEncoder pass = encoder.BeginComputePass(); + pass.SetPipeline(ctx->mul_mat_pipeline); + pass.SetBindGroup(0, bind_group); + pass.DispatchWorkgroups((node->ne[0] * node->ne[1] * node->ne[2] * node->ne[3] + WEBGPU_MUL_MAT_WG_SIZE - 1) / WEBGPU_MUL_MAT_WG_SIZE); + pass.End(); + wgpu::CommandBuffer commands = encoder.Finish(); + + // TODO, don't submit here, batch submissions + ctx->queue.Submit(1, &commands); + // TODO, don't wait on submission here + ggml_backend_webgpu_wait_on_submission(ctx); + return true; + } + + default: + return false; + } +} + +static ggml_status ggml_backend_webgpu_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_graph_compute(" << cgraph->n_nodes << " nodes)"); + + ggml_backend_webgpu_context * backend_ctx = static_cast(backend->context); + webgpu_context ctx = backend_ctx->webgpu_ctx; + + for (int i = 0; i < cgraph->n_nodes; i++) { + ggml_webgpu_encode_node(ctx, cgraph->nodes[i]); + } + + return GGML_STATUS_SUCCESS; +} + +static ggml_backend_i ggml_backend_webgpu_i = { + /* .get_name = */ ggml_backend_webgpu_name, + /* .free = */ ggml_backend_webgpu_free, + /* .set_tensor_async = */ NULL, + /* .get_tensor_async = */ NULL, + /* .cpy_tensor_async = */ NULL, + /* .synchronize = */ NULL, + /* .graph_plan_create = */ NULL, + /* .graph_plan_free = */ NULL, + /* .graph_plan_update = */ NULL, + /* .graph_plan_compute = */ NULL, + /* .graph_compute = */ ggml_backend_webgpu_graph_compute, + /* .event_record = */ NULL, + /* .event_wait = */ NULL, +}; + +/* End GGML Backend Interface */ + +/* GGML Backend Buffer Interface */ + +static void ggml_backend_webgpu_buffer_free_buffer(ggml_backend_buffer_t buffer) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_free_buffer()"); + ggml_backend_webgpu_buffer_context * ctx = static_cast(buffer->context); + ctx->buffer.Destroy(); +} + +// Returns the "fake" base pointer. +static void * ggml_backend_webgpu_buffer_get_base(ggml_backend_buffer_t buffer) { + GGML_UNUSED(buffer); + return webgpu_ptr_base; +} + +static void ggml_backend_webgpu_buffer_memset_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, uint8_t value, size_t offset, size_t size) { + if (size == 0) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_memset_tensor: size is zero, nothing to do."); + return; + } + + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_memset_tensor(" << buffer << ", " << tensor << ", " << value << ", " << offset << ", " << size << ")"); + + ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context; + size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset; + // This is a trick to set all bytes of a u32 to the same 1 byte value. + uint32_t val32 = (uint32_t)value * 0x01010101; + ggml_backend_webgpu_buffer_memset(buf_ctx->webgpu_ctx, buf_ctx->buffer, val32, total_offset, size); +} + +static void ggml_backend_webgpu_buffer_set_tensor(ggml_backend_buffer_t buffer, ggml_tensor * tensor, const void * data, size_t offset, size_t size) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_set_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")"); + ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context; + webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx; + + size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset; + + webgpu_ctx->queue.WriteBuffer(buf_ctx->buffer, total_offset, data, (size/4)*4); + + if (size % 4 != 0) { + // If size is not a multiple of 4, we need to memset the remaining bytes + size_t remaining_size = size % 4; + // pack the remaining bytes into a uint32_t + uint32_t val32 = 0; + for (size_t i = 0; i < remaining_size; i++) { + ((uint8_t *)&val32)[i] = ((const uint8_t *)data)[size - remaining_size + i]; + } + // memset the remaining bytes + ggml_backend_webgpu_buffer_memset(webgpu_ctx, buf_ctx->buffer, val32, total_offset + (size - remaining_size), remaining_size); + } +} + +static void ggml_backend_webgpu_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_get_tensor(" << buffer << ", " << tensor << ", " << data << ", " << offset << ", " << size << ")"); + + ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context; + webgpu_context webgpu_ctx = buf_ctx->webgpu_ctx; + wgpu::Device device = webgpu_ctx->device; + + size_t total_offset = webgpu_tensor_offset(tensor) + tensor->view_offs + offset; + + size_t final_size = size; + if (size % 4 != 0) { + // If size is not a multiple of 4, we need to round it up to the next multiple of 4 + final_size = size + (4 - (size % 4)); + } + + std::lock_guard lock(webgpu_ctx->mutex); + + if (webgpu_ctx->get_tensor_staging_buf == nullptr || + webgpu_ctx->get_tensor_staging_buf.GetSize() < final_size) { + // Create a new staging buffer if it doesn't exist or is too small + if (webgpu_ctx->get_tensor_staging_buf) { + webgpu_ctx->get_tensor_staging_buf.Destroy(); + } + ggml_webgpu_create_buffer(device, webgpu_ctx->get_tensor_staging_buf, final_size, + wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::MapRead, "get_tensor_staging_buf"); + } + + // Copy the data from the buffer to the staging buffer + wgpu::CommandEncoder encoder = device.CreateCommandEncoder(); + encoder.CopyBufferToBuffer(buf_ctx->buffer, total_offset, webgpu_ctx->get_tensor_staging_buf, 0, final_size); + wgpu::CommandBuffer commands = encoder.Finish(); + // Submit the command buffer to the queue + webgpu_ctx->queue.Submit(1, &commands); + + // Map the staging buffer to read the data + ggml_backend_webgpu_map_buffer(webgpu_ctx, webgpu_ctx->get_tensor_staging_buf, wgpu::MapMode::Read, 0, final_size); + // Must specify size here since the staging buffer might be larger than the tensor size + const void * mapped_range = webgpu_ctx->get_tensor_staging_buf.GetConstMappedRange(0, final_size); + + // Copy the data from the mapped range to the output buffer + std::memcpy(data, mapped_range, size); + webgpu_ctx->get_tensor_staging_buf.Unmap(); +} + +static void ggml_backend_webgpu_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_clear(" << buffer << ", " << (uint32_t) value << ")"); + + ggml_backend_webgpu_buffer_context * buf_ctx = (ggml_backend_webgpu_buffer_context *) buffer->context; + ggml_backend_webgpu_buffer_memset(buf_ctx->webgpu_ctx, buf_ctx->buffer, value, 0, buffer->size); +} + +static ggml_backend_buffer_i ggml_backend_webgpu_buffer_interface = { + /* .free_buffer = */ ggml_backend_webgpu_buffer_free_buffer, + /* .get_base = */ ggml_backend_webgpu_buffer_get_base, + /* .init_tensor = */ NULL, // TODO: optional, needed? + /* .memset_tensor = */ ggml_backend_webgpu_buffer_memset_tensor, + /* .set_tensor = */ ggml_backend_webgpu_buffer_set_tensor, + /* .get_tensor = */ ggml_backend_webgpu_buffer_get_tensor, + /* .cpy_tensor = */ NULL, // TODO: optional, implement this + /* .clear = */ ggml_backend_webgpu_buffer_clear, + /* .reset = */ NULL, // TODO: optional, think it coordinates with .init_tensor +}; + +/* End GGML Backend Buffer Interface */ + +/* GGML Backend Buffer Type Interface */ + +static const char * ggml_backend_webgpu_buffer_type_get_name(ggml_backend_buffer_type_t buft) { + ggml_backend_webgpu_device_context * ctx = static_cast(buft->device->context); + return ctx->device_name.c_str(); +} + +static ggml_backend_buffer_t ggml_backend_webgpu_buffer_type_alloc_buffer(ggml_backend_buffer_type_t buft, size_t size) { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_buffer_type_alloc_buffer(" << size << ")"); + ggml_backend_webgpu_device_context * ctx = static_cast(buft->device->context); + + wgpu::Buffer buf; + ggml_webgpu_create_buffer(ctx->webgpu_ctx->device, buf, size, + wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst, "allocated_buffer"); + + ggml_backend_webgpu_buffer_context * buf_ctx = new ggml_backend_webgpu_buffer_context(ctx->webgpu_ctx, buf); + + return ggml_backend_buffer_init(buft, ggml_backend_webgpu_buffer_interface, buf_ctx, size); +} + +static size_t ggml_backend_webgpu_buffer_type_get_alignment(ggml_backend_buffer_type_t buft) { + ggml_backend_webgpu_device_context * ctx = static_cast(buft->device->context); + return ctx->webgpu_ctx->limits.minStorageBufferOffsetAlignment; +} + +// maxBufferSize might be larger, but you can't bind more than maxStorageBufferBindingSize to a single binding. +static size_t ggml_backend_webgpu_buffer_type_get_max_size(ggml_backend_buffer_type_t buft) { + ggml_backend_webgpu_device_context * ctx = static_cast(buft->device->context); + return ctx->webgpu_ctx->limits.maxStorageBufferBindingSize; +} + +/* End GGML Backend Buffer Type Interface */ + +/* GGML Backend Device Interface */ + +static const char * ggml_backend_webgpu_device_get_name(ggml_backend_dev_t dev) { + ggml_backend_webgpu_device_context * ctx = static_cast(dev->context); + return ctx->device_name.c_str(); +} + +static const char * ggml_backend_webgpu_device_get_description(ggml_backend_dev_t dev) { + ggml_backend_webgpu_device_context * ctx = static_cast(dev->context); + return ctx->device_desc.c_str(); +} + +static void ggml_backend_webgpu_device_get_memory(ggml_backend_dev_t dev, size_t * free, size_t * total) { + ggml_backend_webgpu_device_context * ctx = static_cast(dev->context); + // TODO: what do we actually want to return here? maxBufferSize might not be the full available memory. + *free = ctx->webgpu_ctx->limits.maxBufferSize; + *total = ctx->webgpu_ctx->limits.maxBufferSize; +} + +static enum ggml_backend_dev_type ggml_backend_webgpu_device_get_type(ggml_backend_dev_t dev) { + GGML_UNUSED(dev); + return GGML_BACKEND_DEVICE_TYPE_GPU; +} + +static void ggml_backend_webgpu_device_get_props(ggml_backend_dev_t dev, struct ggml_backend_dev_props * props) { + props->name = ggml_backend_webgpu_device_get_name(dev); + props->description = ggml_backend_webgpu_device_get_description(dev); + props->type = ggml_backend_webgpu_device_get_type(dev); + ggml_backend_webgpu_device_get_memory(dev, &props->memory_free, &props->memory_total); + props->caps = { + /* .async = */ false, + /* .host_buffer = */ false, + /* .buffer_from_host_ptr = */ false, + /* .events = */ false, + }; +} + +static ggml_guid_t ggml_backend_webgpu_guid(void) { + static const char * guid_str = "__ggml_webgpu :)"; + return reinterpret_cast((void *)guid_str); +} + +static void ggml_webgpu_init_memset_pipeline(webgpu_context webgpu_ctx) { + // we use the maximum workgroup size for the memset pipeline + size_t max_wg_size = webgpu_ctx->limits.maxComputeWorkgroupSizeX; + size_t max_threads = max_wg_size * webgpu_ctx->limits.maxComputeWorkgroupsPerDimension; + // Size the bytes_per_thread so that the largest buffer size can be handled + webgpu_ctx->memset_bytes_per_thread = (webgpu_ctx->limits.maxStorageBufferBindingSize + max_threads - 1) / max_threads; + std::vector constants(2); + constants[0].key = "wg_size"; + constants[0].value = max_wg_size; + constants[1].key = "bytes_per_thread"; + constants[1].value = webgpu_ctx->memset_bytes_per_thread; + ggml_webgpu_create_pipeline(webgpu_ctx->device, webgpu_ctx->memset_pipeline, wgsl_memset, "memset", constants); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->memset_params_dev_buf, + 3 * sizeof(uint32_t), // 3 parameters: buffer size, offset, value + wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, "memset_params_dev_buf"); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->memset_params_host_buf, + 3 * sizeof(uint32_t), wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, "memset_params_host_buf"); +} + +static void ggml_webgpu_init_mul_mat_pipeline(webgpu_context webgpu_ctx) { + ggml_webgpu_create_pipeline(webgpu_ctx->device, webgpu_ctx->mul_mat_pipeline, wgsl_mul_mat, "mul_mat"); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->mul_mat_params_dev_buf, WEBGPU_MUL_MAT_PARAMS_SIZE, + wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, "mul_mat_params_dev_buf"); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->mul_mat_params_host_buf, WEBGPU_MUL_MAT_PARAMS_SIZE, + wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, "mul_mat_params_host_buf"); +} + +static void ggml_webgpu_init_cpy_pipeline(webgpu_context webgpu_ctx) { + std::vector constants(1); + constants[0].key = "wg_size"; + constants[0].value = webgpu_ctx->limits.maxComputeWorkgroupSizeX; + + ggml_webgpu_create_pipeline(webgpu_ctx->device, webgpu_ctx->cpy_pipeline, wgsl_cpy, "cpy", constants); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->cpy_params_dev_buf, WEBGPU_CPY_PARAMS_SIZE, + wgpu::BufferUsage::Uniform | wgpu::BufferUsage::CopyDst, "cpy_params_dev_buf"); + ggml_webgpu_create_buffer(webgpu_ctx->device, webgpu_ctx->cpy_params_host_buf, WEBGPU_CPY_PARAMS_SIZE, + wgpu::BufferUsage::MapWrite | wgpu::BufferUsage::CopySrc, "cpy_params_host_buf"); +} + +// TODO: Make thread safe if multiple devices are used +static ggml_backend_t ggml_backend_webgpu_device_init(ggml_backend_dev_t dev, const char * params) { + GGML_UNUSED(params); + + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_device_init()"); + + ggml_backend_webgpu_device_context * dev_ctx = static_cast(dev->context); + webgpu_context webgpu_ctx = dev_ctx->webgpu_ctx; + + std::lock_guard lock(webgpu_ctx->mutex); + + if (!webgpu_ctx->device_initialized) { + // Initialize device + wgpu::DeviceDescriptor dev_desc; + dev_desc.requiredLimits = &webgpu_ctx->limits; + dev_desc.requiredFeatures = webgpu_ctx->features.features; + dev_desc.requiredFeatureCount = webgpu_ctx->features.featureCount; + dev_desc.SetDeviceLostCallback(wgpu::CallbackMode::AllowSpontaneous, + [](const wgpu::Device& device, wgpu::DeviceLostReason reason, wgpu::StringView message) { + GGML_UNUSED(device); + GGML_LOG_ERROR("ggml_webgpu: Device lost! Reason: %d, Message: %s\n", static_cast(reason), message.data); + }); + dev_desc.SetUncapturedErrorCallback( + [](const wgpu::Device& device, wgpu::ErrorType reason, wgpu::StringView message) { + GGML_UNUSED(device); + GGML_LOG_ERROR("ggml_webgpu: Device error! Reason: %d, Message: %s\n", static_cast(reason), message.data); + }); + webgpu_ctx->instance.WaitAny(webgpu_ctx->adapter.RequestDevice(&dev_desc, wgpu::CallbackMode::WaitAnyOnly, + [webgpu_ctx](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message) { + if (status != wgpu::RequestDeviceStatus::Success) { + GGML_LOG_ERROR("ggml_webgpu: Failed to get a device: %s\n", message.data); + return; + } + webgpu_ctx->device = device; + }), + UINT64_MAX + ); + GGML_ASSERT(webgpu_ctx->device != nullptr); + + // Initialize (compute) queue + webgpu_ctx->queue = webgpu_ctx->device.GetQueue(); + + ggml_webgpu_init_memset_pipeline(webgpu_ctx); + ggml_webgpu_init_mul_mat_pipeline(webgpu_ctx); + ggml_webgpu_init_cpy_pipeline(webgpu_ctx); + webgpu_ctx->device_initialized = true; + } + + static ggml_backend_webgpu_context backend_ctx; + backend_ctx.name = GGML_WEBGPU_NAME + std::string(": ") + dev_ctx->device_name; + backend_ctx.webgpu_ctx = webgpu_ctx; + + // See GGML Backend Interface section + static ggml_backend backend = { + /* .guid = */ ggml_backend_webgpu_guid(), + /* .interface = */ ggml_backend_webgpu_i, + /* .device = */ dev, + /* .context = */ &backend_ctx, + }; + + return &backend; +} + +static ggml_backend_buffer_type_t ggml_backend_webgpu_device_get_buffer_type(ggml_backend_dev_t dev) { + // See GGML Backend Buffer Type Interface section + static struct ggml_backend_buffer_type ggml_backend_webgpu_buffer_type = { + /* .iface = */ { + /* .get_name = */ ggml_backend_webgpu_buffer_type_get_name, + /* .alloc_buffer = */ ggml_backend_webgpu_buffer_type_alloc_buffer, + /* .get_alignment = */ ggml_backend_webgpu_buffer_type_get_alignment, + /* .get_max_size = */ ggml_backend_webgpu_buffer_type_get_max_size, + /* .get_alloc_size = */ NULL, // defaults to ggml_nbytes + /* .is_host = */ NULL, // defaults to false + }, + /* .device = */ dev, + /* .context = */ NULL, + }; + + return &ggml_backend_webgpu_buffer_type; +} + +static bool ggml_backend_webgpu_device_supports_buft(ggml_backend_dev_t dev, ggml_backend_buffer_type_t buft) { + GGML_UNUSED(dev); + return buft->iface.get_name == ggml_backend_webgpu_buffer_type_get_name; +} + +static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const ggml_tensor * op) { + GGML_UNUSED(dev); + + switch (op->op) { + case GGML_OP_NONE: + case GGML_OP_VIEW: + case GGML_OP_PERMUTE: + return true; + case GGML_OP_CPY: + return op->type == GGML_TYPE_F16 && op->src[0]->type == GGML_TYPE_F32; + case GGML_OP_MUL_MAT: + return op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32; + default: + return false; + } +} + +static struct ggml_backend_device_i ggml_backend_webgpu_device_i = { + /* .get_name = */ ggml_backend_webgpu_device_get_name, + /* .get_description = */ ggml_backend_webgpu_device_get_description, + /* .get_memory = */ ggml_backend_webgpu_device_get_memory, + /* .get_type = */ ggml_backend_webgpu_device_get_type, + /* .get_props = */ ggml_backend_webgpu_device_get_props, + /* .init_backend = */ ggml_backend_webgpu_device_init, + /* .get_buffer_type = */ ggml_backend_webgpu_device_get_buffer_type, + /* .get_host_buffer_type = */ NULL, + /* .buffer_from_host_ptr = */ NULL, + /* .supports_op = */ ggml_backend_webgpu_device_supports_op, + /* .supports_buft = */ ggml_backend_webgpu_device_supports_buft, + /* .offload_op = */ NULL, + /* .event_new = */ NULL, + /* .event_free = */ NULL, + /* .event_synchronize = */ NULL, +}; + +/* End GGML Backend Device Interface */ + +/* GGML Backend Registration Interface */ + +static const char * ggml_backend_webgpu_reg_get_name(ggml_backend_reg_t reg) { + ggml_backend_webgpu_reg_context * ctx = static_cast(reg->context); + return ctx->name; +} + +static size_t ggml_backend_webgpu_reg_get_device_count(ggml_backend_reg_t reg) { + ggml_backend_webgpu_reg_context * ctx = static_cast(reg->context); + return ctx->device_count; +} + +// TODO: Does this need to be thread safe? Is it only called once? +// Only one device is supported for now +static ggml_backend_dev_t ggml_backend_webgpu_reg_get_device(ggml_backend_reg_t reg, size_t index) { + GGML_ASSERT(index == 0); + WEBGPU_LOG_DEBUG("ggml_backend_reg_get_device()"); + + ggml_backend_webgpu_reg_context * reg_ctx = static_cast(reg->context); + + webgpu_context ctx = reg_ctx->webgpu_ctx; + + wgpu::RequestAdapterOptions options = {}; + auto callback = [](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter, const char *message, void *userdata) { + if (status != wgpu::RequestAdapterStatus::Success) { + GGML_LOG_ERROR("ggml_webgpu: Failed to get an adapter: %s\n", message); + return; + } + *static_cast(userdata) = adapter; + }; + void *userdata = &ctx->adapter; + ctx->instance.WaitAny(ctx->instance.RequestAdapter(&options, wgpu::CallbackMode::WaitAnyOnly, callback, userdata), UINT64_MAX); + GGML_ASSERT(ctx->adapter != nullptr); + + ctx->adapter.GetLimits(&ctx->limits); + ctx->adapter.GetFeatures(&ctx->features); + + wgpu::AdapterInfo info{}; + ctx->adapter.GetInfo(&info); + + static ggml_backend_webgpu_device_context device_ctx; + device_ctx.webgpu_ctx = ctx; + device_ctx.device_name = GGML_WEBGPU_NAME; + device_ctx.device_desc = std::string(info.description.data); + + GGML_LOG_INFO("ggml_webgpu: adapter_info: vendor_id: %u | vendor: %s | architecture: %s | device_id: %u | name: %s | device_desc: %s\n", + info.vendorID, info.vendor.data, info.architecture.data, info.deviceID, info.device.data, info.description.data); + + // See GGML Backend Device Interface section + static ggml_backend_device device = { + /* .iface = */ ggml_backend_webgpu_device_i, + /* .reg = */ reg, + /* .context = */ &device_ctx, + }; + return &device; +} + + +static const struct ggml_backend_reg_i ggml_backend_webgpu_reg_i = { + /* .get_name = */ ggml_backend_webgpu_reg_get_name, + /* .get_device_count = */ ggml_backend_webgpu_reg_get_device_count, + /* .get_device = */ ggml_backend_webgpu_reg_get_device, + /* .get_proc_address = */ NULL, +}; + +/* End GGML Backend Registration Interface */ + +// TODO: Does this need to be thread safe? Is it only called once? +ggml_backend_reg_t ggml_backend_webgpu_reg() { + WEBGPU_LOG_DEBUG("ggml_backend_webgpu_reg()"); + + webgpu_context webgpu_ctx = std::make_shared(); + webgpu_ctx->device_initialized = false; + + static ggml_backend_webgpu_reg_context ctx; + ctx.webgpu_ctx = webgpu_ctx; + ctx.name = GGML_WEBGPU_NAME; + ctx.device_count = 1; + + wgpu::InstanceDescriptor instance_descriptor{}; + std::vector instance_features = {wgpu::InstanceFeatureName::TimedWaitAny}; + instance_descriptor.requiredFeatures = instance_features.data(); + instance_descriptor.requiredFeatureCount = instance_features.size(); + webgpu_ctx->instance = wgpu::CreateInstance(&instance_descriptor); + GGML_ASSERT(webgpu_ctx->instance != nullptr); + + static ggml_backend_reg reg = { + /* .api_version = */ GGML_BACKEND_API_VERSION, + /* .iface = */ ggml_backend_webgpu_reg_i, + /* .context = */ &ctx, + }; + return ® +} + +ggml_backend_t ggml_backend_webgpu_init(void) { + ggml_backend_dev_t dev = ggml_backend_reg_dev_get(ggml_backend_webgpu_reg(), 0); + + return ggml_backend_webgpu_device_init(dev, nullptr); +} + +GGML_BACKEND_DL_IMPL(ggml_backend_webgpu_reg) diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl new file mode 100644 index 0000000000000..6fe924c554cc3 --- /dev/null +++ b/ggml/src/ggml-webgpu/wgsl-shaders/cpy.wgsl @@ -0,0 +1,60 @@ +enable f16; + +@group(0) @binding(0) +var src: array; + +@group(0) @binding(1) +var dst: array; + +struct Params { + ne: u32, // total number of elements + offset_src: u32, // in elements + offset_dst: u32, // in elements + + // Strides (in elements) — may be permuted + stride_src0: u32, + stride_src1: u32, + stride_src2: u32, + stride_src3: u32, + + stride_dst0: u32, + stride_dst1: u32, + stride_dst2: u32, + stride_dst3: u32, + + // Logical shape (same for both tensors) + ne0: u32, + ne1: u32, + ne2: u32, + ne3: u32, +}; + +@group(0) @binding(2) +var params: Params; + +override wg_size: u32; +@compute @workgroup_size(wg_size) +fn main(@builtin(global_invocation_id) gid: vec3) { + if (gid.x >= params.ne) { + return; + } + + var i = gid.x; + + let i3 = i / (params.ne2 * params.ne1 * params.ne0); + i = i % (params.ne2 * params.ne1 * params.ne0); + + let i2 = i / (params.ne1 * params.ne0); + i = i % (params.ne1 * params.ne0); + + let i1 = i / params.ne0; + let i0 = i % params.ne0; + + let src_idx = i0 * params.stride_src0 + i1 * params.stride_src1 + + i2 * params.stride_src2 + i3 * params.stride_src3; + + let dst_idx = i0 * params.stride_dst0 + i1 * params.stride_dst1 + + i2 * params.stride_dst2 + i3 * params.stride_dst3; + + dst[params.offset_dst + dst_idx] = f16(src[params.offset_src + src_idx]); +} diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py b/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py new file mode 100755 index 0000000000000..962dcd6b170ed --- /dev/null +++ b/ggml/src/ggml-webgpu/wgsl-shaders/embed_wgsl.py @@ -0,0 +1,35 @@ +import os +import argparse + + +def escape_triple_quotes(wgsl): + # Simple defense in case of embedded """ + return wgsl.replace('"""', '\\"""') + + +def to_cpp_string_literal(varname, content): + return f'const char* wgsl_{varname} = R"({content})";\n' + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument('--input', required=True) + parser.add_argument('--output', required=True) + args = parser.parse_args() + + with open(args.output, 'w', encoding='utf-8') as out: + out.write("// Auto-generated shader embedding \n\n") + for fname in sorted(os.listdir(args.input)): + if not fname.endswith('.wgsl'): + continue + shader_path = os.path.join(args.input, fname) + varname = os.path.splitext(fname)[0] + with open(shader_path, 'r', encoding='utf-8') as f: + content = f.read() + content = escape_triple_quotes(content) + out.write(to_cpp_string_literal(varname, content)) + out.write('\n') + + +if __name__ == '__main__': + main() diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl new file mode 100644 index 0000000000000..cb7c8c3e09e91 --- /dev/null +++ b/ggml/src/ggml-webgpu/wgsl-shaders/memset.wgsl @@ -0,0 +1,40 @@ +@group(0) @binding(0) +var output_buffer: array; + +struct Params { + offset: u32, // in bytes + size: u32, // in bytes + value: u32, // 4 8-bit values, which are either repeating (memset_tensor) or may be separate (cleaning up unaligned set_tensor operations) +}; + +@group(0) @binding(1) +var params: Params; + +override wg_size: u32; +override bytes_per_thread: u32; + +@compute @workgroup_size(wg_size) +fn main(@builtin(global_invocation_id) gid: vec3) { + let i = gid.x * bytes_per_thread; + let start = params.offset; + let end = params.offset + params.size; + + for (var j: u32 = 0u; j < bytes_per_thread; j = j + 1u) { + let byte_index = start + i + j; + if (byte_index + 4u <= end) { + output_buffer[(byte_index >> 2u)] = params.value; + } else { + // Handle tail (unaligned) + for (var k: u32 = 0u; k < 4u; k = k + 1u) { + let idx = byte_index + k; + if (idx < end) { + let word_idx = idx >> 2u; + let byte_offset = (idx & 3u) * 8u; + let mask = ~(0xffu << byte_offset); + let existing = output_buffer[word_idx]; + output_buffer[word_idx] = (existing & mask) | ((params.value & 0xffu) << byte_offset); + } + } + } + } +} diff --git a/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl b/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl new file mode 100644 index 0000000000000..054aab566f96b --- /dev/null +++ b/ggml/src/ggml-webgpu/wgsl-shaders/mul_mat.wgsl @@ -0,0 +1,56 @@ +struct MulMatParams { + m: u32, + n: u32, + k: u32, + // all strides are in elements + stride_01: u32, + stride_11: u32, + stride_02: u32, + stride_12: u32, + stride_03: u32, + stride_13: u32, + + bs02: u32, + bs03: u32, + broadcast2: u32, + broadcast3: u32 +}; + +@group(0) @binding(0) var src0: array; // N rows, K columns +@group(0) @binding(1) var src1: array; // M rows, K columns (transposed) +@group(0) @binding(2) var dst: array; // M rows, N columns + +@group(0) @binding(3) var params: MulMatParams; + +@compute @workgroup_size(64) +fn main(@builtin(global_invocation_id) global_id: vec3) { + let total = params.m * params.n * params.bs02 * params.broadcast2 * params.bs03 * params.broadcast3; + if (global_id.x >= total) { + return; + } + + let dst2_stride = params.m * params.n; + let dst3_stride = dst2_stride * params.bs02 * params.broadcast2; + + let dst3_idx = global_id.x / dst3_stride; + let src03_idx = dst3_idx / params.broadcast3; // src0 may be broadcast along the third dimension + let src13_idx = dst3_idx; // src1 is not broadcast + let dst3_rem = global_id.x % dst3_stride; + + let dst2_idx = dst3_rem / dst2_stride; + let src02_idx = dst2_idx / params.broadcast2; // src0 may also be broadcast along the second dimension + let src12_idx = dst2_idx; // src1 is not broadcast + + let dst2_rem = dst3_rem % dst2_stride; + + let row = dst2_rem / params.n; // output row + let col = dst2_rem % params.n; // output column + + var sum = 0.0; + for (var i: u32 = 0u; i < params.k; i = i + 1u) { + let src0_idx = src03_idx * params.stride_03 + src02_idx * params.stride_02 + col * params.stride_01 + i; + let src1_idx = src13_idx * params.stride_13 + src12_idx * params.stride_12 + row * params.stride_11 + i; + sum = sum + src0[src0_idx] * src1[src1_idx]; + } + dst[dst3_idx * dst3_stride + dst2_idx * dst2_stride + row * params.n + col] = sum; +} diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index 196b7b8f3e2ae..5ae1c527df639 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -61,9 +61,6 @@ #define m512i(p) (__m512i)(p) #endif -// precomputed f32 table for f16 (256 KB) (ggml-impl.h) -float ggml_table_f32_f16[1 << 16]; - #if defined(__linux__) || \ defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ (defined(__APPLE__) && !TARGET_OS_TV && !TARGET_OS_WATCH) @@ -205,19 +202,34 @@ void ggml_print_backtrace(void) { } #endif +static ggml_abort_callback_t g_abort_callback = NULL; + +// Set the abort callback (passing null will restore original abort functionality: printing a message to stdout) +GGML_API ggml_abort_callback_t ggml_set_abort_callback(ggml_abort_callback_t callback) { + ggml_abort_callback_t ret_val = g_abort_callback; + g_abort_callback = callback; + return ret_val; +} + void ggml_abort(const char * file, int line, const char * fmt, ...) { fflush(stdout); - fprintf(stderr, "%s:%d: ", file, line); + char message[2048]; + int offset = snprintf(message, sizeof(message), "%s:%d: ", file, line); va_list args; va_start(args, fmt); - vfprintf(stderr, fmt, args); + vsnprintf(message + offset, sizeof(message) - offset, fmt, args); va_end(args); - fprintf(stderr, "\n"); + if (g_abort_callback) { + g_abort_callback(message); + } else { + // default: print error and backtrace to stderr + fprintf(stderr, "%s\n", message); + ggml_print_backtrace(); + } - ggml_print_backtrace(); abort(); } @@ -461,6 +473,14 @@ bool ggml_guid_matches(ggml_guid_t guid_a, ggml_guid_t guid_b) { return memcmp(guid_a, guid_b, sizeof(ggml_guid)) == 0; } +const char * ggml_version(void) { + return GGML_VERSION; +} + +const char * ggml_commit(void) { + return GGML_COMMIT; +} + // // timing // @@ -888,12 +908,6 @@ struct ggml_context { struct ggml_object * objects_end; }; -struct ggml_context_container { - bool used; - - struct ggml_context context; -}; - // // data types // @@ -942,6 +956,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "TRANSPOSE", "GET_ROWS", "GET_ROWS_BACK", + "SET_ROWS", "DIAG", "DIAG_MASK_INF", "DIAG_MASK_ZERO", @@ -953,6 +968,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CONV_TRANSPOSE_1D", "IM2COL", "IM2COL_BACK", + "CONV_2D", "CONV_2D_DW", "CONV_TRANSPOSE_2D", "POOL_1D", @@ -961,6 +977,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "UPSCALE", "PAD", "PAD_REFLECT_1D", + "ROLL", "ARANGE", "TIMESTEP_EMBEDDING", "ARGSORT", @@ -989,9 +1006,11 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = { "CROSS_ENTROPY_LOSS", "CROSS_ENTROPY_LOSS_BACK", "OPT_STEP_ADAMW", + + "GLU", }; -static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82"); +static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86"); static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "none", @@ -1037,6 +1056,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "transpose(x)", "get_rows(x)", "get_rows_back(x)", + "set_rows(x)", "diag(x)", "diag_mask_inf(x)", "diag_mask_zero(x)", @@ -1048,6 +1068,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "conv_transpose_1d(x)", "im2col(x)", "im2col_back(x)", + "conv_2d(x)", "conv_2d_dw(x)", "conv_transpose_2d(x)", "pool_1d(x)", @@ -1056,6 +1077,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "upscale(x)", "pad(x)", "pad_reflect_1d(x)", + "roll(x)", "arange(start, stop, step)", "timestep_embedding(timesteps, dim, max_period)", "argsort(x)", @@ -1084,9 +1106,11 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = { "cross_entropy_loss(x,y)", "cross_entropy_loss_back(x,y)", "adamw(x)", + + "glu(x)", }; -static_assert(GGML_OP_COUNT == 82, "GGML_OP_COUNT != 82"); +static_assert(GGML_OP_COUNT == 86, "GGML_OP_COUNT != 86"); static_assert(GGML_OP_POOL_COUNT == 2, "GGML_OP_POOL_COUNT != 2"); @@ -1112,6 +1136,17 @@ static const char * GGML_UNARY_OP_NAME[GGML_UNARY_OP_COUNT] = { static_assert(GGML_UNARY_OP_COUNT == 15, "GGML_UNARY_OP_COUNT != 15"); +static const char * GGML_GLU_OP_NAME[GGML_GLU_OP_COUNT] = { + "REGLU", + "GEGLU", + "SWIGLU", + "GEGLU_ERF", + "GEGLU_QUICK", +}; + +static_assert(GGML_GLU_OP_COUNT == 5, "GGML_GLU_OP_COUNT != 5"); + + static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN"); static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN"); @@ -1214,11 +1249,19 @@ const char * ggml_unary_op_name(enum ggml_unary_op op) { return GGML_UNARY_OP_NAME[op]; } +const char * ggml_glu_op_name(enum ggml_glu_op op) { + return GGML_GLU_OP_NAME[op]; +} + const char * ggml_op_desc(const struct ggml_tensor * t) { if (t->op == GGML_OP_UNARY) { enum ggml_unary_op uop = ggml_get_unary_op(t); return ggml_unary_op_name(uop); } + if (t->op == GGML_OP_GLU) { + enum ggml_glu_op gop = ggml_get_glu_op(t); + return ggml_glu_op_name(gop); + } return ggml_op_name(t->op); } @@ -1355,6 +1398,12 @@ bool ggml_is_contiguous_channels(const struct ggml_tensor * tensor) { tensor->nb[2] == ggml_type_size(tensor->type); } +bool ggml_is_contiguous_rows(const struct ggml_tensor * tensor) { + return + tensor->ne[0] == ggml_blck_size(tensor->type) || + tensor->nb[0] == ggml_type_size(tensor->type); +} + static inline bool ggml_is_padded_1d(const struct ggml_tensor * tensor) { static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); @@ -1426,14 +1475,6 @@ struct ggml_context * ggml_init(struct ggml_init_params params) { // initialize time system (required on Windows) ggml_time_init(); - for (int i = 0; i < (1 << 16); ++i) { - union { - uint16_t u16; - ggml_fp16_t fp16; - } u = {i}; - ggml_table_f32_f16[i] = GGML_COMPUTE_FP16_TO_FP32(u.fp16); - } - is_first_call = false; } @@ -1737,6 +1778,11 @@ enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) { return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0); } +enum ggml_glu_op ggml_get_glu_op(const struct ggml_tensor * tensor) { + GGML_ASSERT(tensor->op == GGML_OP_GLU); + return (enum ggml_glu_op) ggml_get_op_params_i32(tensor, 0); +} + const char * ggml_get_name(const struct ggml_tensor * tensor) { return tensor->name; } @@ -2616,6 +2662,156 @@ struct ggml_tensor * ggml_exp_inplace( return ggml_unary_inplace(ctx, a, GGML_UNARY_OP_EXP); } +// ggml_glu + +static struct ggml_tensor * ggml_glu_impl( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op, + bool swapped) { + GGML_ASSERT(ggml_is_contiguous_1(a)); + + if (b) { + GGML_ASSERT(ggml_is_contiguous_1(b)); + GGML_ASSERT(ggml_are_same_shape(a, b)); + GGML_ASSERT(a->type == b->type); + } + + int64_t ne[GGML_MAX_DIMS] = { a->ne[0] / 2 }; for (int i = 1; i < GGML_MAX_DIMS; i++) ne[i] = a->ne[i]; + struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, GGML_MAX_DIMS, b ? a->ne : ne, NULL, 0); + + ggml_set_op_params_i32(result, 0, (int32_t) op); + ggml_set_op_params_i32(result, 1, (int32_t) swapped); + + result->op = GGML_OP_GLU; + result->src[0] = a; + result->src[1] = b; + + return result; +} + +struct ggml_tensor * ggml_glu( + struct ggml_context * ctx, + struct ggml_tensor * a, + enum ggml_glu_op op, + bool swapped) { + return ggml_glu_impl(ctx, a, NULL, op, swapped); +} + +struct ggml_tensor * ggml_glu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + enum ggml_glu_op op) { + return ggml_glu_impl(ctx, a, b, op, false); +} + +// ggml_reglu + +struct ggml_tensor * ggml_reglu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, false); +} + +struct ggml_tensor * ggml_reglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_REGLU, true); +} + +struct ggml_tensor * ggml_reglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_REGLU, false); +} + +// ggml_geglu + +struct ggml_tensor * ggml_geglu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, false); +} + +struct ggml_tensor * ggml_geglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU, true); +} + +struct ggml_tensor * ggml_geglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU, false); +} + +// ggml_swiglu + +struct ggml_tensor * ggml_swiglu( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, false); +} + +struct ggml_tensor * ggml_swiglu_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_SWIGLU, true); +} + +struct ggml_tensor * ggml_swiglu_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_SWIGLU, false); +} + +// ggml_geglu_erf + +struct ggml_tensor * ggml_geglu_erf( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, false); +} + +struct ggml_tensor * ggml_geglu_erf_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_ERF, true); +} + +struct ggml_tensor * ggml_geglu_erf_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_ERF, false); +} + +// ggml_geglu_quick + +struct ggml_tensor * ggml_geglu_quick( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, false); +} + +struct ggml_tensor * ggml_geglu_quick_swapped( + struct ggml_context * ctx, + struct ggml_tensor * a) { + return ggml_glu_impl(ctx, a, NULL, GGML_GLU_OP_GEGLU_QUICK, true); +} + +struct ggml_tensor * ggml_geglu_quick_split( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b) { + return ggml_glu_impl(ctx, a, b, GGML_GLU_OP_GEGLU_QUICK, false); +} + // ggml_norm static struct ggml_tensor * ggml_norm_impl( @@ -2873,12 +3069,14 @@ static struct ggml_tensor * ggml_scale_impl( struct ggml_context * ctx, struct ggml_tensor * a, float s, + float b, bool inplace) { GGML_ASSERT(ggml_is_padded_1d(a)); struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); - ggml_set_op_params(result, &s, sizeof(s)); + float params[2] = { s, b }; + ggml_set_op_params(result, ¶ms, sizeof(params)); result->op = GGML_OP_SCALE; result->src[0] = a; @@ -2890,14 +3088,30 @@ struct ggml_tensor * ggml_scale( struct ggml_context * ctx, struct ggml_tensor * a, float s) { - return ggml_scale_impl(ctx, a, s, false); + return ggml_scale_impl(ctx, a, s, 0.0, false); } struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, struct ggml_tensor * a, float s) { - return ggml_scale_impl(ctx, a, s, true); + return ggml_scale_impl(ctx, a, s, 0.0, true); +} + +struct ggml_tensor * ggml_scale_bias( + struct ggml_context * ctx, + struct ggml_tensor * a, + float s, + float b) { + return ggml_scale_impl(ctx, a, s, b, false); +} + +struct ggml_tensor * ggml_scale_bias_inplace( + struct ggml_context * ctx, + struct ggml_tensor * a, + float s, + float b) { + return ggml_scale_impl(ctx, a, s, b, true); } // ggml_set @@ -3399,6 +3613,35 @@ struct ggml_tensor * ggml_get_rows_back( return result; } +// ggml_set_rows + +struct ggml_tensor * ggml_set_rows( + struct ggml_context * ctx, + struct ggml_tensor * a, + struct ggml_tensor * b, + struct ggml_tensor * c) { + GGML_ASSERT(a->ne[0] == b->ne[0]); + GGML_ASSERT(a->ne[2] == b->ne[2]); + GGML_ASSERT(a->ne[3] == b->ne[3]); + GGML_ASSERT(b->ne[1] == c->ne[0]); + GGML_ASSERT(b->ne[2] % c->ne[1] == 0); + GGML_ASSERT(b->ne[3] % c->ne[2] == 0); + GGML_ASSERT(c->ne[3] == 1); + GGML_ASSERT(b->type == GGML_TYPE_F32); + GGML_ASSERT(c->type == GGML_TYPE_I64); + + GGML_ASSERT(ggml_is_contiguous_rows(a)); + GGML_ASSERT(ggml_is_contiguous_rows(b)); + + struct ggml_tensor * result = ggml_view_tensor(ctx, a); + + result->op = GGML_OP_SET_ROWS; + result->src[0] = b; + result->src[1] = c; + + return result; +} + // ggml_diag struct ggml_tensor * ggml_diag( @@ -3493,9 +3736,10 @@ static struct ggml_tensor * ggml_soft_max_impl( if (mask) { GGML_ASSERT(mask->type == GGML_TYPE_F16 || mask->type == GGML_TYPE_F32); GGML_ASSERT(ggml_is_contiguous(mask)); - GGML_ASSERT(ggml_is_matrix(mask)); GGML_ASSERT(mask->ne[0] == a->ne[0]); GGML_ASSERT(mask->ne[1] >= a->ne[1]); + GGML_ASSERT(a->ne[2]%mask->ne[2] == 0); + GGML_ASSERT(a->ne[3]%mask->ne[3] == 0); } if (max_bias > 0.0f) { @@ -4135,6 +4379,44 @@ struct ggml_tensor * ggml_conv_2d_dw_direct( return result; } +// ggml_conv_2d_direct + +struct ggml_tensor * ggml_conv_2d_direct( + struct ggml_context * ctx, + struct ggml_tensor * a, // convolution kernel [KW, KH, IC, OC] + struct ggml_tensor * b, // input data [W, H, C, N] + int s0, // stride dimension 0 + int s1, // stride dimension 1 + int p0, // padding dimension 0 + int p1, // padding dimension 1 + int d0, // dilation dimension 0 + int d1) {// dilation dimension 1 + + GGML_ASSERT(a->ne[2] == b->ne[2]); + //GGML_ASSERT(a->type == b->type); + + int64_t ne[4]; + ne[0] = ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0); + ne[1] = ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1); + ne[2] = a->ne[3]; + ne[3] = b->ne[3]; + + struct ggml_tensor * result = ggml_new_tensor(ctx, b->type, 4, ne); + + ggml_set_op_params_i32(result, 0, s0); + ggml_set_op_params_i32(result, 1, s1); + ggml_set_op_params_i32(result, 2, p0); + ggml_set_op_params_i32(result, 3, p1); + ggml_set_op_params_i32(result, 4, d0); + ggml_set_op_params_i32(result, 5, d1); + + result->op = GGML_OP_CONV_2D; + result->src[0] = a; + result->src[1] = b; + + return result; +} + // ggml_conv_transpose_2d_p0 static int64_t ggml_calc_conv_transpose_output_size(int64_t ins, int64_t ks, int s, int p) { @@ -4251,24 +4533,21 @@ struct ggml_tensor * ggml_pool_2d_back( return result; } -// ggml_upscale +// ggml_upscale / ggml_interpolate -static struct ggml_tensor * ggml_upscale_impl( +static struct ggml_tensor * ggml_interpolate_impl( struct ggml_context * ctx, struct ggml_tensor * a, - int ne0, - int ne1, - int ne2, - int ne3, - enum ggml_scale_mode mode) { - GGML_ASSERT(a->ne[0] <= ne0); - GGML_ASSERT(a->ne[1] <= ne1); - GGML_ASSERT(a->ne[2] <= ne2); - GGML_ASSERT(a->ne[3] <= ne3); + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + GGML_ASSERT((mode & 0xFF) < GGML_SCALE_MODE_COUNT); struct ggml_tensor * result = ggml_new_tensor_4d(ctx, a->type, ne0, ne1, ne2, ne3); - ggml_set_op_params_i32(result, 0, mode); + ggml_set_op_params_i32(result, 0, (int32_t)mode); result->op = GGML_OP_UPSCALE; result->src[0] = a; @@ -4281,7 +4560,8 @@ struct ggml_tensor * ggml_upscale( struct ggml_tensor * a, int scale_factor, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); + GGML_ASSERT(scale_factor > 1); + return ggml_interpolate_impl(ctx, a, a->ne[0] * scale_factor, a->ne[1] * scale_factor, a->ne[2], a->ne[3], mode); } struct ggml_tensor * ggml_upscale_ext( @@ -4292,7 +4572,18 @@ struct ggml_tensor * ggml_upscale_ext( int ne2, int ne3, enum ggml_scale_mode mode) { - return ggml_upscale_impl(ctx, a, ne0, ne1, ne2, ne3, mode); + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); +} + +struct ggml_tensor * ggml_interpolate( + struct ggml_context * ctx, + struct ggml_tensor * a, + int64_t ne0, + int64_t ne1, + int64_t ne2, + int64_t ne3, + uint32_t mode) { + return ggml_interpolate_impl(ctx, a, ne0, ne1, ne2, ne3, mode); } // ggml_pad @@ -4347,6 +4638,34 @@ struct ggml_tensor * ggml_pad_reflect_1d( return result; } +// ggml_roll + +struct ggml_tensor * ggml_roll( + struct ggml_context * ctx, + struct ggml_tensor * a, + int shift0, + int shift1, + int shift2, + int shift3) { + GGML_ASSERT(a->nb[0] == ggml_type_size(a->type)); + GGML_ASSERT(abs(shift0) < a->ne[0]); + GGML_ASSERT(abs(shift1) < a->ne[1]); + GGML_ASSERT(abs(shift2) < a->ne[2]); + GGML_ASSERT(abs(shift3) < a->ne[3]); + + struct ggml_tensor * result = ggml_dup_tensor(ctx, a); + + ggml_set_op_params_i32(result, 0, shift0); + ggml_set_op_params_i32(result, 1, shift1); + ggml_set_op_params_i32(result, 2, shift2); + ggml_set_op_params_i32(result, 3, shift3); + + result->op = GGML_OP_ROLL; + result->src[0] = a; + + return result; +} + // ggml_arange struct ggml_tensor * ggml_arange( @@ -4441,13 +4760,17 @@ struct ggml_tensor * ggml_flash_attn_ext( GGML_ASSERT(ggml_can_mul_mat(k, q)); // TODO: check if vT can be multiplied by (k*qT) + GGML_ASSERT(q->ne[3] == k->ne[3]); + GGML_ASSERT(q->ne[3] == v->ne[3]); + if (mask) { GGML_ASSERT(ggml_is_contiguous(mask)); - GGML_ASSERT(mask->ne[2] == 1); - GGML_ASSERT(mask->ne[3] == 1); GGML_ASSERT(mask->ne[1] >= GGML_PAD(q->ne[1], GGML_KQ_MASK_PAD) && "the Flash-Attention kernel requires the mask to be padded to GGML_KQ_MASK_PAD and at least n_queries big"); //GGML_ASSERT(ggml_can_repeat_rows(mask, qk)); + + GGML_ASSERT(q->ne[2] % mask->ne[2] == 0); + GGML_ASSERT(q->ne[3] % mask->ne[3] == 0); } if (max_bias > 0.0f) { @@ -4575,7 +4898,6 @@ struct ggml_tensor * ggml_ssm_conv( const int64_t n_s = sx->ne[2]; // TODO: maybe support other strides than 1? - // FIXME: this is always true? GGML_ASSERT(sx->ne[0] == d_conv - 1 + n_t); GGML_ASSERT(sx->ne[1] == d_inner); GGML_ASSERT(n_t >= 0); @@ -4598,36 +4920,49 @@ struct ggml_tensor * ggml_ssm_scan( struct ggml_tensor * dt, struct ggml_tensor * A, struct ggml_tensor * B, - struct ggml_tensor * C) { + struct ggml_tensor * C, + struct ggml_tensor * ids) { GGML_ASSERT(ggml_is_contiguous(s)); - GGML_ASSERT(ggml_is_contiguous(x)); GGML_ASSERT(ggml_is_contiguous(dt)); GGML_ASSERT(ggml_is_contiguous(A)); - GGML_ASSERT(ggml_is_matrix(A)); - GGML_ASSERT(ggml_is_3d(B)); - GGML_ASSERT(ggml_is_3d(s)); + GGML_ASSERT(x->nb[0] == ggml_type_size(x->type)); GGML_ASSERT(B->nb[0] == ggml_type_size(B->type)); GGML_ASSERT(C->nb[0] == ggml_type_size(C->type)); - GGML_ASSERT(ggml_are_same_shape(x, dt)); + GGML_ASSERT(x->nb[1] == x->ne[0]*x->nb[0]); + GGML_ASSERT(B->nb[1] == B->ne[0]*B->nb[0]); + GGML_ASSERT(C->nb[1] == C->ne[0]*C->nb[0]); GGML_ASSERT(ggml_are_same_shape(B, C)); + GGML_ASSERT(ids->type == GGML_TYPE_I32); { const int64_t d_state = s->ne[0]; - const int64_t d_inner = s->ne[1]; - const int64_t n_seq_tokens = x->ne[1]; - const int64_t n_seqs = x->ne[2]; - - GGML_ASSERT(s->ne[2] == n_seqs); - GGML_ASSERT(x->ne[0] == d_inner); - GGML_ASSERT(A->ne[0] == d_state); - GGML_ASSERT(A->ne[1] == d_inner); + const int64_t head_dim = x->ne[0]; + const int64_t n_head = x->ne[1]; + const int64_t n_seq_tokens = x->ne[2]; + const int64_t n_seqs = x->ne[3]; + + GGML_ASSERT(dt->ne[0] == n_head); + GGML_ASSERT(dt->ne[1] == n_seq_tokens); + GGML_ASSERT(dt->ne[2] == n_seqs); + GGML_ASSERT(ggml_is_3d(dt)); + GGML_ASSERT(s->ne[1] == head_dim); + GGML_ASSERT(s->ne[2] == n_head); GGML_ASSERT(B->ne[0] == d_state); - GGML_ASSERT(B->ne[1] == n_seq_tokens); - GGML_ASSERT(B->ne[2] == n_seqs); + GGML_ASSERT(B->ne[2] == n_seq_tokens); + GGML_ASSERT(B->ne[3] == n_seqs); + GGML_ASSERT(ids->ne[0] == n_seqs); + GGML_ASSERT(ggml_is_vector(ids)); + GGML_ASSERT(A->ne[1] == n_head); + GGML_ASSERT(ggml_is_matrix(A)); + + if (A->ne[0] != 1) { + // Mamba-1 has more granular decay factors + GGML_ASSERT(A->ne[0] == d_state); + } } // concatenated y + ssm_states - struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + ggml_nelements(s)); + struct ggml_tensor * result = ggml_new_tensor_1d(ctx, GGML_TYPE_F32, ggml_nelements(x) + s->ne[0]*s->ne[1]*s->ne[2]*ids->ne[0]); result->op = GGML_OP_SSM_SCAN; result->src[0] = s; @@ -4636,6 +4971,7 @@ struct ggml_tensor * ggml_ssm_scan( result->src[3] = A; result->src[4] = B; result->src[5] = C; + result->src[6] = ids; return result; } @@ -5459,7 +5795,7 @@ static void ggml_compute_backward( } break; case GGML_OP_MEAN: { if (src0_needs_grads) { - ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], false)); + ggml_add1_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, 1.0f/src0->ne[0], 0.0, false)); } } break; case GGML_OP_REPEAT: { @@ -5536,7 +5872,7 @@ static void ggml_compute_backward( if (src0_needs_grads) { float s; memcpy(&s, tensor->op_params, sizeof(float)); - ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, false)); + ggml_add_or_set(ctx, cgraph, isrc0, ggml_scale_impl(ctx, grad, s, 0.0, false)); } } break; case GGML_OP_SET: { @@ -5776,13 +6112,28 @@ static void ggml_compute_backward( } GGML_ASSERT(!src1_needs_grads && "backward pass for labels not implemented"); } break; + case GGML_OP_GLU: { + switch (ggml_get_glu_op(tensor)) { + case GGML_GLU_OP_SWIGLU: { + if (src0_needs_grads) { + GGML_ASSERT(src1 && "backward pass only implemented for split swiglu"); + ggml_add_or_set(ctx, cgraph, isrc0, ggml_silu_back(ctx, ggml_mul(ctx, grad, src1), src0)); + } + if (src1_needs_grads) { + ggml_add_or_set(ctx, cgraph, isrc1, ggml_mul(ctx, ggml_silu(ctx, src0), grad)); + } + } break; + default: { + GGML_ABORT("unsupported glu op for backward pass: %s", ggml_glu_op_name(ggml_get_glu_op(tensor))); + } //break; + } + } break; case GGML_OP_NONE: { // noop } break; case GGML_OP_COUNT: default: { - fprintf(stderr, "%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op)); - GGML_ABORT("fatal error"); + GGML_ABORT("%s: unsupported ggml op for backward pass: %s\n", __func__, ggml_op_name(tensor->op)); } //break; } @@ -5791,19 +6142,32 @@ static void ggml_compute_backward( GGML_ASSERT(!src2_needs_grads || ggml_are_same_shape(src2, cgraph->grads[isrc2])); } -static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { +static size_t ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * node) { // check if already visited - if (ggml_hash_insert(&cgraph->visited_hash_set, node) == GGML_HASHSET_ALREADY_EXISTS) { - return; + size_t node_hash_pos = ggml_hash_find(&cgraph->visited_hash_set, node); + GGML_ASSERT(node_hash_pos != GGML_HASHSET_FULL); + if (!ggml_bitset_get(cgraph->visited_hash_set.used, node_hash_pos)) { + // This is the first time we see this node in the current graph. + cgraph->visited_hash_set.keys[node_hash_pos] = node; + ggml_bitset_set(cgraph->visited_hash_set.used, node_hash_pos); + cgraph->use_counts[node_hash_pos] = 0; + } else { + // already visited + return node_hash_pos; } for (int i = 0; i < GGML_MAX_SRC; ++i) { const int k = (cgraph->order == GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT) ? i : (cgraph->order == GGML_CGRAPH_EVAL_ORDER_RIGHT_TO_LEFT) ? (GGML_MAX_SRC-1-i) : - /* unknown order, just fall back to using i*/ i; - if (node->src[k]) { - ggml_visit_parents(cgraph, node->src[k]); + /* unknown order, just fall back to using i */ i; + + struct ggml_tensor * src = node->src[k]; + if (src) { + size_t src_hash_pos = ggml_visit_parents(cgraph, src); + + // Update the use count for this operand. + cgraph->use_counts[src_hash_pos]++; } } @@ -5827,6 +6191,8 @@ static void ggml_visit_parents(struct ggml_cgraph * cgraph, struct ggml_tensor * cgraph->nodes[cgraph->n_nodes] = node; cgraph->n_nodes++; } + + return node_hash_pos; } static void ggml_build_forward_impl(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor, bool expand) { @@ -5964,6 +6330,7 @@ static size_t ggml_graph_nbytes(size_t size, bool grads) { incr_ptr_aligned(&p, sizeof(struct ggml_cgraph), 1); incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // nodes incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // leafs + incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); // use_counts incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // hash keys if (grads) { incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); // grads @@ -5993,11 +6360,12 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz void * p = cgraph + 1; - struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); - struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); - struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); - struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; - struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; + struct ggml_tensor ** nodes_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + struct ggml_tensor ** leafs_ptr = incr_ptr_aligned(&p, size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + int32_t * use_counts_ptr = incr_ptr_aligned(&p, hash_size * sizeof(int32_t), sizeof(int32_t)); + struct ggml_tensor ** hash_keys_ptr = incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)); + struct ggml_tensor ** grads_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; + struct ggml_tensor ** grad_accs_ptr = grads ? incr_ptr_aligned(&p, hash_size * sizeof(struct ggml_tensor *), sizeof(struct ggml_tensor *)) : NULL; ggml_bitset_t * hash_used = incr_ptr_aligned(&p, ggml_bitset_size(hash_size) * sizeof(ggml_bitset_t), sizeof(ggml_bitset_t)); @@ -6012,6 +6380,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz /*.grads =*/ grads_ptr, /*.grad_accs =*/ grad_accs_ptr, /*.leafs =*/ leafs_ptr, + /*.use_counts =*/ use_counts_ptr, /*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr }, /*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT, }; @@ -6038,7 +6407,8 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1) /*.grads =*/ NULL, // gradients would need visited_hash_set /*.grad_accs =*/ NULL, /*.leafs =*/ NULL, - /*.visited_hash_set =*/ { 0, NULL, NULL }, + /*.use_counts =*/ cgraph0->use_counts, + /*.visited_hash_set =*/ cgraph0->visited_hash_set, /*.order =*/ cgraph0->order, }; @@ -6065,7 +6435,8 @@ void ggml_graph_cpy(struct ggml_cgraph * src, struct ggml_cgraph * dst) { for (size_t i = 0; i < src->visited_hash_set.size; ++i) { // copy all hashset keys (tensors) that are in use if (ggml_bitset_get(src->visited_hash_set.used, i)) { - ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]); + size_t new_hash_pos = ggml_hash_insert(&dst->visited_hash_set, src->visited_hash_set.keys[i]); + dst->use_counts[new_hash_pos] = src->use_counts[i]; } } diff --git a/ggml/src/gguf.cpp b/ggml/src/gguf.cpp index a0a318a29f5b9..53504399c57f4 100644 --- a/ggml/src/gguf.cpp +++ b/ggml/src/gguf.cpp @@ -335,7 +335,11 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par for (uint32_t i = 0; i < magic.size(); i++) { if (magic[i] != GGUF_MAGIC[i]) { - GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, magic[0], magic[1], magic[2], magic[3]); + char c0 = isprint(magic[0]) ? magic[0] : '?'; + char c1 = isprint(magic[1]) ? magic[1] : '?'; + char c2 = isprint(magic[2]) ? magic[2] : '?'; + char c3 = isprint(magic[3]) ? magic[3] : '?'; + GGML_LOG_ERROR("%s: invalid magic characters: '%c%c%c%c', expected 'GGUF'\n", __func__, c0, c1, c2, c3); gguf_free(ctx); return nullptr; } @@ -627,7 +631,14 @@ struct gguf_context * gguf_init_from_file_impl(FILE * file, struct gguf_init_par gguf_free(ctx); return nullptr; } - ctx->size += GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment); + size_t padded_size = GGML_PAD(ggml_nbytes(&ti.t), ctx->alignment); + if (SIZE_MAX - ctx->size < padded_size) { + GGML_LOG_ERROR("%s: tensor '%s' size overflow, cannot accumulate size %zu + %zu\n", + __func__, ti.t.name, ctx->size, padded_size); + gguf_free(ctx); + return nullptr; + } + ctx->size += padded_size; } } diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 3ee2b2064e1b4..a8f5947ac33bf 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -118,6 +118,10 @@ class LLM: EMBEDDING_SCALE = "{arch}.embedding_scale" TOKEN_SHIFT_COUNT = "{arch}.token_shift_count" INTERLEAVE_MOE_LAYER_STEP = "{arch}.interleave_moe_layer_step" + ACTIVATION_SPARSITY_SCALE = "{arch}.activation_sparsity_scale" + ALTUP_ACTIVE_IDX = "{arch}.altup.active_idx" + ALTUP_NUM_INPUTS = "{arch}.altup.num_inputs" + EMBD_LENGTH_PER_LAYER_INP = "{arch}.embedding_length_per_layer_input" class Attention: HEAD_COUNT = "{arch}.attention.head_count" @@ -142,6 +146,8 @@ class Attention: SCALE = "{arch}.attention.scale" KEY_LENGTH_MLA = "{arch}.attention.key_length_mla" VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla" + SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers" + SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern" class Rope: DIMENSION_COUNT = "{arch}.rope.dimension_count" @@ -164,6 +170,7 @@ class SSM: INNER_SIZE = "{arch}.ssm.inner_size" STATE_SIZE = "{arch}.ssm.state_size" TIME_STEP_RANK = "{arch}.ssm.time_step_rank" + GROUP_COUNT = "{arch}.ssm.group_count" DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms" class WKV: @@ -180,6 +187,9 @@ class ConvNext: class Classifier: OUTPUT_LABELS = "{arch}.classifier.output_labels" + class ShortConv: + L_CACHE = "{arch}.shortconv.l_cache" + class Tokenizer: MODEL = "tokenizer.ggml.model" PRE = "tokenizer.ggml.pre" @@ -198,6 +208,7 @@ class Tokenizer: MASK_ID = "tokenizer.ggml.mask_token_id" ADD_BOS = "tokenizer.ggml.add_bos_token" ADD_EOS = "tokenizer.ggml.add_eos_token" + ADD_SEP = "tokenizer.ggml.add_sep_token" ADD_PREFIX = "tokenizer.ggml.add_space_prefix" REMOVE_EXTRA_WS = "tokenizer.ggml.remove_extra_whitespaces" PRECOMPILED_CHARSMAP = "tokenizer.ggml.precompiled_charsmap" @@ -280,6 +291,7 @@ class MODEL_ARCH(IntEnum): LLAMA4 = auto() DECI = auto() FALCON = auto() + FALCON_H1 = auto() BAICHUAN = auto() GROK = auto() GPT2 = auto() @@ -291,6 +303,7 @@ class MODEL_ARCH(IntEnum): BERT = auto() NOMIC_BERT = auto() NOMIC_BERT_MOE = auto() + NEO_BERT = auto() JINA_BERT_V2 = auto() BLOOM = auto() STABLELM = auto() @@ -304,6 +317,7 @@ class MODEL_ARCH(IntEnum): PHI3 = auto() PHIMOE = auto() PLAMO = auto() + PLAMO2 = auto() CODESHELL = auto() ORION = auto() INTERNLM2 = auto() @@ -312,12 +326,15 @@ class MODEL_ARCH(IntEnum): GEMMA = auto() GEMMA2 = auto() GEMMA3 = auto() + GEMMA3N = auto() STARCODER2 = auto() RWKV6 = auto() RWKV6QWEN2 = auto() RWKV7 = auto() ARWKV7 = auto() MAMBA = auto() + MAMBA2 = auto() + JAMBA = auto() XVERSE = auto() COMMAND_R = auto() COHERE2 = auto() @@ -339,10 +356,19 @@ class MODEL_ARCH(IntEnum): EXAONE = auto() GRANITE = auto() GRANITE_MOE = auto() + GRANITE_HYBRID = auto() CHAMELEON = auto() WAVTOKENIZER_DEC = auto() PLM = auto() BAILINGMOE = auto() + DOTS1 = auto() + ARCEE = auto() + ERNIE4_5 = auto() + ERNIE4_5_MOE = auto() + HUNYUAN_MOE = auto() + SMOLLM3 = auto() + LFM2 = auto() + DREAM = auto() class VISION_PROJECTOR_TYPE(IntEnum): @@ -395,12 +421,32 @@ class MODEL_TENSOR(IntEnum): ATTN_Q_NORM = auto() ATTN_K_NORM = auto() LAYER_OUT_NORM = auto() + PER_LAYER_TOKEN_EMBD = auto() # gemma3n + PER_LAYER_MODEL_PROJ = auto() # gemma3n + PER_LAYER_INP_GATE = auto() # gemma3n + PER_LAYER_PROJ = auto() # gemma3n + PER_LAYER_PROJ_NORM = auto() # gemma3n + PER_LAYER_POST_NORM = auto() # gemma3n + ALTUP_PROJ = auto() # gemma3n + ALTUP_UNEMBD_PROJ = auto() # gemma3n + ALTUP_CORRECT_COEF = auto() # gemma3n + ALTUP_CORRECT_SCALE = auto() # gemma3n + ALTUP_PREDICT_COEF = auto() # gemma3n + ALTUP_ROUTER = auto() # gemma3n + ALTUP_ROUTER_NORM = auto() # gemma3n + LAUREL_L = auto() # gemma3n + LAUREL_R = auto() # gemma3n + LAUREL_POST_NORM = auto() # gemma3n SSM_IN = auto() SSM_CONV1D = auto() SSM_X = auto() SSM_DT = auto() + SSM_DT_NORM = auto() SSM_A = auto() + SSM_B_NORM = auto() + SSM_C_NORM = auto() SSM_D = auto() + SSM_NORM = auto() SSM_OUT = auto() TIME_MIX_W0 = auto() TIME_MIX_W1 = auto() @@ -494,6 +540,9 @@ class MODEL_TENSOR(IntEnum): POSNET_ATTN_K = auto() POSNET_ATTN_V = auto() POSNET_ATTN_OUT = auto() + SHORTCONV_CONV = auto() + SHORTCONV_INPROJ = auto() + SHORTCONV_OUTPROJ = auto() # vision V_MMPROJ = auto() V_MMPROJ_FC = auto() @@ -571,6 +620,7 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH.BERT: "bert", MODEL_ARCH.NOMIC_BERT: "nomic-bert", MODEL_ARCH.NOMIC_BERT_MOE: "nomic-bert-moe", + MODEL_ARCH.NEO_BERT: "neo-bert", MODEL_ARCH.JINA_BERT_V2: "jina-bert-v2", MODEL_ARCH.BLOOM: "bloom", MODEL_ARCH.STABLELM: "stablelm", @@ -584,6 +634,7 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH.PHI3: "phi3", MODEL_ARCH.PHIMOE: "phimoe", MODEL_ARCH.PLAMO: "plamo", + MODEL_ARCH.PLAMO2: "plamo2", MODEL_ARCH.CODESHELL: "codeshell", MODEL_ARCH.ORION: "orion", MODEL_ARCH.INTERNLM2: "internlm2", @@ -592,12 +643,15 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH.GEMMA: "gemma", MODEL_ARCH.GEMMA2: "gemma2", MODEL_ARCH.GEMMA3: "gemma3", + MODEL_ARCH.GEMMA3N: "gemma3n", MODEL_ARCH.STARCODER2: "starcoder2", MODEL_ARCH.RWKV6: "rwkv6", MODEL_ARCH.RWKV6QWEN2: "rwkv6qwen2", MODEL_ARCH.RWKV7: "rwkv7", MODEL_ARCH.ARWKV7: "arwkv7", MODEL_ARCH.MAMBA: "mamba", + MODEL_ARCH.MAMBA2: "mamba2", + MODEL_ARCH.JAMBA: "jamba", MODEL_ARCH.XVERSE: "xverse", MODEL_ARCH.COMMAND_R: "command-r", MODEL_ARCH.COHERE2: "cohere2", @@ -619,10 +673,20 @@ class MODEL_TENSOR(IntEnum): MODEL_ARCH.EXAONE: "exaone", MODEL_ARCH.GRANITE: "granite", MODEL_ARCH.GRANITE_MOE: "granitemoe", + MODEL_ARCH.GRANITE_HYBRID: "granitehybrid", MODEL_ARCH.CHAMELEON: "chameleon", MODEL_ARCH.WAVTOKENIZER_DEC: "wavtokenizer-dec", MODEL_ARCH.PLM: "plm", MODEL_ARCH.BAILINGMOE: "bailingmoe", + MODEL_ARCH.DOTS1: "dots1", + MODEL_ARCH.ARCEE: "arcee", + MODEL_ARCH.ERNIE4_5: "ernie4_5", + MODEL_ARCH.ERNIE4_5_MOE: "ernie4_5-moe", + MODEL_ARCH.FALCON_H1: "falcon-h1", + MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe", + MODEL_ARCH.SMOLLM3: "smollm3", + MODEL_ARCH.LFM2: "lfm2", + MODEL_ARCH.DREAM: "dream", } VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = { @@ -675,12 +739,32 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps", MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b", MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm", + MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: "per_layer_token_embd", # gemma3n + MODEL_TENSOR.PER_LAYER_MODEL_PROJ: "per_layer_model_proj", # gemma3n + MODEL_TENSOR.PER_LAYER_PROJ_NORM: "per_layer_proj_norm", # gemma3n + MODEL_TENSOR.ALTUP_UNEMBD_PROJ: "altup_unembd_proj", # gemma3n + MODEL_TENSOR.ALTUP_PROJ: "altup_proj", # gemma3n + MODEL_TENSOR.PER_LAYER_INP_GATE: "blk.{bid}.inp_gate", # gemma3n + MODEL_TENSOR.PER_LAYER_PROJ: "blk.{bid}.proj", # gemma3n + MODEL_TENSOR.PER_LAYER_POST_NORM: "blk.{bid}.post_norm", # gemma3n + MODEL_TENSOR.ALTUP_CORRECT_COEF: "blk.{bid}.altup_correct_coef", # gemma3n + MODEL_TENSOR.ALTUP_CORRECT_SCALE: "blk.{bid}.altup_correct_scale", # gemma3n + MODEL_TENSOR.ALTUP_PREDICT_COEF: "blk.{bid}.altup_predict_coef", # gemma3n + MODEL_TENSOR.ALTUP_ROUTER: "blk.{bid}.altup_router", # gemma3n + MODEL_TENSOR.ALTUP_ROUTER_NORM: "blk.{bid}.altup_router_norm", # gemma3n + MODEL_TENSOR.LAUREL_L: "blk.{bid}.laurel_l", # gemma3n + MODEL_TENSOR.LAUREL_R: "blk.{bid}.laurel_r", # gemma3n + MODEL_TENSOR.LAUREL_POST_NORM: "blk.{bid}.laurel_post_norm", # gemma3n MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in", MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d", MODEL_TENSOR.SSM_X: "blk.{bid}.ssm_x", MODEL_TENSOR.SSM_DT: "blk.{bid}.ssm_dt", + MODEL_TENSOR.SSM_DT_NORM: "blk.{bid}.ssm_dt_norm", MODEL_TENSOR.SSM_A: "blk.{bid}.ssm_a", + MODEL_TENSOR.SSM_B_NORM: "blk.{bid}.ssm_b_norm", + MODEL_TENSOR.SSM_C_NORM: "blk.{bid}.ssm_c_norm", MODEL_TENSOR.SSM_D: "blk.{bid}.ssm_d", + MODEL_TENSOR.SSM_NORM: "blk.{bid}.ssm_norm", MODEL_TENSOR.SSM_OUT: "blk.{bid}.ssm_out", MODEL_TENSOR.TIME_MIX_W0: "blk.{bid}.time_mix_w0", MODEL_TENSOR.TIME_MIX_W1: "blk.{bid}.time_mix_w1", @@ -774,6 +858,9 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.POSNET_ATTN_K: "posnet.{bid}.attn_k", MODEL_TENSOR.POSNET_ATTN_V: "posnet.{bid}.attn_v", MODEL_TENSOR.POSNET_ATTN_OUT: "posnet.{bid}.attn_output", + MODEL_TENSOR.SHORTCONV_CONV: "blk.{bid}.shortconv.conv", + MODEL_TENSOR.SHORTCONV_INPROJ: "blk.{bid}.shortconv.in_proj", + MODEL_TENSOR.SHORTCONV_OUTPROJ: "blk.{bid}.shortconv.out_proj", # vision MODEL_TENSOR.V_MMPROJ: "mm.{bid}", MODEL_TENSOR.V_MMPROJ_FC: "mm.model.fc", @@ -1077,6 +1164,18 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_UP_EXP, MODEL_TENSOR.LAYER_OUT_NORM, ], + MODEL_ARCH.NEO_BERT: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_QKV, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.ENC_OUTPUT_NORM, + MODEL_TENSOR.CLS, + MODEL_TENSOR.CLS_OUT, + ], MODEL_ARCH.JINA_BERT_V2: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD_NORM, @@ -1194,6 +1293,21 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.DREAM: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], MODEL_ARCH.QWEN2VL: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, @@ -1276,6 +1390,36 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_DOWN, MODEL_TENSOR.FFN_UP, ], + MODEL_ARCH.PLAMO2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_QKV, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_POST_NORM, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_POST_NORM, + MODEL_TENSOR.SSM_IN, + MODEL_TENSOR.SSM_CONV1D, + MODEL_TENSOR.SSM_X, + MODEL_TENSOR.SSM_DT, + MODEL_TENSOR.SSM_A, + MODEL_TENSOR.SSM_D, + MODEL_TENSOR.SSM_OUT, + MODEL_TENSOR.SSM_DT_NORM, + MODEL_TENSOR.SSM_B_NORM, + MODEL_TENSOR.SSM_C_NORM, + ], MODEL_ARCH.GPT2: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.POS_EMBD, @@ -1467,6 +1611,41 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_PRE_NORM, MODEL_TENSOR.FFN_POST_NORM, ], + MODEL_ARCH.GEMMA3N: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_POST_NORM, + MODEL_TENSOR.FFN_PRE_NORM, + MODEL_TENSOR.FFN_POST_NORM, + # altup / laurel + MODEL_TENSOR.PER_LAYER_TOKEN_EMBD, + MODEL_TENSOR.PER_LAYER_MODEL_PROJ, + MODEL_TENSOR.PER_LAYER_INP_GATE, + MODEL_TENSOR.PER_LAYER_PROJ, + MODEL_TENSOR.PER_LAYER_PROJ_NORM, + MODEL_TENSOR.PER_LAYER_POST_NORM, + MODEL_TENSOR.ALTUP_PROJ, + MODEL_TENSOR.ALTUP_UNEMBD_PROJ, + MODEL_TENSOR.ALTUP_CORRECT_COEF, + MODEL_TENSOR.ALTUP_CORRECT_SCALE, + MODEL_TENSOR.ALTUP_PREDICT_COEF, + MODEL_TENSOR.ALTUP_ROUTER, + MODEL_TENSOR.ALTUP_ROUTER_NORM, + MODEL_TENSOR.LAUREL_L, + MODEL_TENSOR.LAUREL_R, + MODEL_TENSOR.LAUREL_POST_NORM, + ], MODEL_ARCH.STARCODER2: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, @@ -1618,6 +1797,47 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.SSM_D, MODEL_TENSOR.SSM_OUT, ], + MODEL_ARCH.MAMBA2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.SSM_IN, + MODEL_TENSOR.SSM_CONV1D, + MODEL_TENSOR.SSM_DT, + MODEL_TENSOR.SSM_A, + MODEL_TENSOR.SSM_D, + MODEL_TENSOR.SSM_NORM, + MODEL_TENSOR.SSM_OUT, + ], + MODEL_ARCH.JAMBA: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.SSM_IN, + MODEL_TENSOR.SSM_CONV1D, + MODEL_TENSOR.SSM_X, + MODEL_TENSOR.SSM_DT, + MODEL_TENSOR.SSM_DT_NORM, + MODEL_TENSOR.SSM_A, + MODEL_TENSOR.SSM_B_NORM, + MODEL_TENSOR.SSM_C_NORM, + MODEL_TENSOR.SSM_D, + MODEL_TENSOR.SSM_OUT, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_UP_EXP, + ], MODEL_ARCH.XVERSE: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, @@ -1804,6 +2024,28 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_UP_SHEXP, MODEL_TENSOR.FFN_EXP_PROBS_B, ], + MODEL_ARCH.ERNIE4_5_MOE: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_UP_EXP, + MODEL_TENSOR.FFN_GATE_SHEXP, + MODEL_TENSOR.FFN_DOWN_SHEXP, + MODEL_TENSOR.FFN_UP_SHEXP, + MODEL_TENSOR.FFN_EXP_PROBS_B, + ], MODEL_ARCH.PLM: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT, @@ -1987,6 +2229,36 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_UP_SHEXP, MODEL_TENSOR.FFN_DOWN_SHEXP, ], + MODEL_ARCH.GRANITE_HYBRID: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.SSM_IN, + MODEL_TENSOR.SSM_CONV1D, + MODEL_TENSOR.SSM_DT, + MODEL_TENSOR.SSM_A, + MODEL_TENSOR.SSM_D, + MODEL_TENSOR.SSM_NORM, + MODEL_TENSOR.SSM_OUT, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + # MoE + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_UP_EXP, + MODEL_TENSOR.FFN_GATE_SHEXP, + MODEL_TENSOR.FFN_UP_SHEXP, + MODEL_TENSOR.FFN_DOWN_SHEXP, + # Dense + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], MODEL_ARCH.CHAMELEON: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.OUTPUT_NORM, @@ -2044,6 +2316,148 @@ class MODEL_TENSOR(IntEnum): MODEL_TENSOR.FFN_DOWN_SHEXP, MODEL_TENSOR.FFN_UP_SHEXP, ], + MODEL_ARCH.DOTS1: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_EXP_PROBS_B, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_GATE_SHEXP, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_DOWN_SHEXP, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_UP_EXP, + MODEL_TENSOR.FFN_UP_SHEXP, + ], + MODEL_ARCH.ARCEE: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], + MODEL_ARCH.ERNIE4_5: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], + MODEL_ARCH.FALCON_H1: [ + # Token embedding + MODEL_TENSOR.TOKEN_EMBD, + + # Input layernorm + MODEL_TENSOR.ATTN_NORM, + + # Attention components + MODEL_TENSOR.ATTN_Q, # Query projection + MODEL_TENSOR.ATTN_K, # Key projection + MODEL_TENSOR.ATTN_V, # Value projection + MODEL_TENSOR.ATTN_OUT, # Output projection + + # SSM components (Mamba2 specific) + MODEL_TENSOR.SSM_IN, # Input projection for SSM + MODEL_TENSOR.SSM_CONV1D, # Convolution layer + MODEL_TENSOR.SSM_DT, # Delta time projection + MODEL_TENSOR.SSM_A, # A parameter (log form) + MODEL_TENSOR.SSM_D, # D parameter + MODEL_TENSOR.SSM_NORM, # Normalization in SSM + MODEL_TENSOR.SSM_OUT, # Output projection + + # Pre-feedforward layernorm + MODEL_TENSOR.FFN_PRE_NORM, + + # Feed-forward network components + MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU) + MODEL_TENSOR.FFN_DOWN, # Down projection + MODEL_TENSOR.FFN_UP, # Up projection + + # Post-feedforward layernorm + MODEL_TENSOR.OUTPUT_NORM, # Final layer norm + MODEL_TENSOR.OUTPUT, # Output projection (lm_head) + ], + MODEL_ARCH.HUNYUAN_MOE: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.FFN_GATE_INP, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE_EXP, + MODEL_TENSOR.FFN_DOWN_EXP, + MODEL_TENSOR.FFN_UP_EXP, + MODEL_TENSOR.FFN_GATE_SHEXP, + MODEL_TENSOR.FFN_DOWN_SHEXP, + MODEL_TENSOR.FFN_UP_SHEXP, + ], + MODEL_ARCH.SMOLLM3: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.OUTPUT_NORM, + MODEL_TENSOR.OUTPUT, + MODEL_TENSOR.ROPE_FREQS, + MODEL_TENSOR.ATTN_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + MODEL_TENSOR.ATTN_ROT_EMBD, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + ], + MODEL_ARCH.LFM2: [ + MODEL_TENSOR.TOKEN_EMBD, + MODEL_TENSOR.TOKEN_EMBD_NORM, + MODEL_TENSOR.SHORTCONV_CONV, + MODEL_TENSOR.SHORTCONV_INPROJ, + MODEL_TENSOR.SHORTCONV_OUTPROJ, + MODEL_TENSOR.FFN_GATE, + MODEL_TENSOR.FFN_DOWN, + MODEL_TENSOR.FFN_UP, + MODEL_TENSOR.FFN_NORM, + MODEL_TENSOR.ATTN_NORM, # operator_norm + MODEL_TENSOR.ATTN_Q_NORM, + MODEL_TENSOR.ATTN_K_NORM, + MODEL_TENSOR.ATTN_Q, + MODEL_TENSOR.ATTN_K, + MODEL_TENSOR.ATTN_V, + MODEL_TENSOR.ATTN_OUT, + ], # TODO } @@ -2348,6 +2762,7 @@ class VisionProjectorType: KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK +KEY_SSM_GROUP_COUNT = Keys.SSM.GROUP_COUNT KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS # tokenization diff --git a/gguf-py/gguf/gguf_writer.py b/gguf-py/gguf/gguf_writer.py index adc673e38ff07..4f23f9b024619 100644 --- a/gguf-py/gguf/gguf_writer.py +++ b/gguf-py/gguf/gguf_writer.py @@ -271,7 +271,7 @@ def write_ti_data_to_file(self) -> None: def add_key_value(self, key: str, val: Any, vtype: GGUFValueType, sub_type: GGUFValueType | None = None) -> None: if any(key in kv_data for kv_data in self.kv_data): - raise ValueError(f'Duplicated key name {key!r}') + logger.warning(f'Duplicated key name {key!r}, overwriting it with new value {val!r} of type {vtype.name}') self.kv_data[0][key] = GGUFValue(value=val, type=vtype, sub_type=sub_type) @@ -648,6 +648,9 @@ def add_convnext_embedding_length(self, length: int) -> None: def add_convnext_block_count(self, length: int) -> None: self.add_uint32(Keys.ConvNext.BLOCK_COUNT.format(arch=self.arch), length) + def add_shortconv_l_cache(self, length: int) -> None: + self.add_uint32(Keys.ShortConv.L_CACHE.format(arch=self.arch), length) + def add_block_count(self, length: int) -> None: self.add_uint32(Keys.LLM.BLOCK_COUNT.format(arch=self.arch), length) @@ -672,6 +675,18 @@ def add_parallel_residual(self, use: bool) -> None: def add_decoder_start_token_id(self, id: int) -> None: self.add_uint32(Keys.LLM.DECODER_START_TOKEN_ID.format(arch=self.arch), id) + def add_embedding_length_per_layer_input(self, value: int) -> None: + self.add_uint32(Keys.LLM.EMBD_LENGTH_PER_LAYER_INP.format(arch=self.arch), value) + + def add_altup_active_idx(self, val: int) -> None: + self.add_uint32(Keys.LLM.ALTUP_ACTIVE_IDX.format(arch=self.arch), val) + + def add_altup_num_inputs(self, val: int) -> None: + self.add_uint32(Keys.LLM.ALTUP_NUM_INPUTS.format(arch=self.arch), val) + + def add_activation_sparsity_scale(self, values: Sequence[float]) -> None: + self.add_array(Keys.LLM.ACTIVATION_SPARSITY_SCALE.format(arch=self.arch), values) + def add_head_count(self, count: int | Sequence[int]) -> None: if isinstance(count, int): self.add_uint32(Keys.Attention.HEAD_COUNT.format(arch=self.arch), count) @@ -702,6 +717,12 @@ def add_max_alibi_bias(self, bias: float) -> None: def add_clamp_kqv(self, value: float) -> None: self.add_float32(Keys.Attention.CLAMP_KQV.format(arch=self.arch), value) + def add_shared_kv_layers(self, value: int) -> None: + self.add_uint32(Keys.Attention.SHARED_KV_LAYERS.format(arch=self.arch), value) + + def add_sliding_window_pattern(self, value: Sequence[bool]) -> None: + self.add_array(Keys.Attention.SLIDING_WINDOW_PATTERN.format(arch=self.arch), value) + def add_logit_scale(self, value: float) -> None: self.add_float32(Keys.LLM.LOGIT_SCALE.format(arch=self.arch), value) @@ -843,6 +864,9 @@ def add_ssm_state_size(self, value: int) -> None: def add_ssm_time_step_rank(self, value: int) -> None: self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value) + def add_ssm_group_count(self, value: int) -> None: + self.add_uint32(Keys.SSM.GROUP_COUNT.format(arch=self.arch), value) + def add_ssm_dt_b_c_rms(self, value: bool) -> None: self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value) @@ -891,6 +915,9 @@ def add_add_bos_token(self, value: bool) -> None: def add_add_eos_token(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.ADD_EOS, value) + def add_add_sep_token(self, value: bool) -> None: + self.add_bool(Keys.Tokenizer.ADD_SEP, value) + def add_add_space_prefix(self, value: bool) -> None: self.add_bool(Keys.Tokenizer.ADD_PREFIX, value) diff --git a/gguf-py/gguf/scripts/gguf_dump.py b/gguf-py/gguf/scripts/gguf_dump.py index e282892d645c7..8177dff386c7e 100755 --- a/gguf-py/gguf/scripts/gguf_dump.py +++ b/gguf-py/gguf/scripts/gguf_dump.py @@ -234,6 +234,8 @@ def dump_markdown_metadata(reader: GGUFReader, args: argparse.Namespace) -> None markdown_content += '## Key Value Metadata Store\n\n' markdown_content += f'There are {len(reader.fields)} key-value pairs in this file\n' markdown_content += '\n' + total_model_bytes = 0 + total_model_elements = 0 kv_dump_table: list[dict[str, str | int]] = [] for n, field in enumerate(reader.fields.values(), 1): @@ -377,6 +379,8 @@ def escape_markdown_inline_code(value_string): tensors = tensor_groups[group] group_elements = sum(tensor.n_elements for tensor in tensors) group_percentage = group_elements / total_elements * 100 + total_group_bytes = 0 + total_group_elements = 0 markdown_content += f"### {translate_tensor_name(group)} Tensor Group : {element_count_rounded_notation(group_elements)} Elements\n\n" # Precalculate column sizing for visual consistency @@ -397,7 +401,13 @@ def escape_markdown_inline_code(value_string): element_count_est = f"({element_count_rounded_notation(tensor.n_elements):>{prettify_element_est_count_size}})" element_count_string = f"{element_count_est} {tensor.n_elements:>{prettify_element_count_size}}" type_name_string = f"{tensor.tensor_type.name}" - tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string}) + if tensor.n_elements > 0: + bpw = (tensor.n_bytes * 8) / tensor.n_elements + else: + bpw = float('nan') + tensor_dump_table.append({"t_id":tensor_name_to_key[tensor.name], "layer_name":tensor.name, "human_layer_name":human_friendly_name, "element_count":element_count_string, "pretty_dimension":pretty_dimension, "tensor_type":type_name_string, "bpw": f"{bpw:.4f}"}) + total_group_bytes += tensor.n_bytes + total_group_elements += tensor.n_elements tensor_dump_table_header_map = [ {'key_name':'t_id', 'header_name':'T_ID', 'align':'right'}, @@ -406,6 +416,7 @@ def escape_markdown_inline_code(value_string): {'key_name':'element_count', 'header_name':'Elements', 'align':'left'}, {'key_name':'pretty_dimension', 'header_name':'Shape', 'align':'left'}, {'key_name':'tensor_type', 'header_name':'Type', 'align':'left'}, + {'key_name':'bpw', 'header_name':'BPW', 'align':'right'}, ] markdown_content += markdown_table_with_alignment_support(tensor_dump_table_header_map, tensor_dump_table) @@ -413,8 +424,20 @@ def escape_markdown_inline_code(value_string): markdown_content += "\n" markdown_content += f"- Total elements in {group}: ({element_count_rounded_notation(group_elements):>4}) {group_elements}\n" markdown_content += f"- Percentage of total elements: {group_percentage:.2f}%\n" + if total_group_elements > 0: + total_group_bpw = (total_group_bytes * 8) / total_group_elements + markdown_content += f"- Bits per Weight (BPW) for {group}: {total_group_bpw:.4f} bits\n" + else: + markdown_content += f"- Bits per Weight (BPW) for {group}: undefined (no elements)\n" markdown_content += "\n\n" + total_model_bytes += total_group_bytes + total_model_elements += total_group_elements + if total_model_elements > 0: + total_model_bpw = (total_model_bytes * 8) / total_model_elements + markdown_content += f"Total BPW for {os.path.basename(args.model)}: {total_model_bpw:.4f} bits" + else: + markdown_content += f"Total BPW for {os.path.basename(args.model)}: undefined (no elements)" print(markdown_content) # noqa: NP100 diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 439fc1afeeb0c..7fbda422f0fe9 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -13,7 +13,7 @@ class TensorNameMap: "transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone "transformer.word_embeddings", # falcon "word_embeddings", # bloom - "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 + "model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414 plamo2 granite-hybrid "tok_embeddings", # llama-pth "embeddings.word_embeddings", # bert nomic-bert "language_model.embedding.word_embeddings", # persimmon @@ -31,6 +31,7 @@ class TensorNameMap: "model.embeddings", # rwkv7 "model.word_embeddings", # bailingmoe "language_model.model.embed_tokens", # llama4 + "encoder", # neobert ), # Token type embeddings @@ -49,6 +50,7 @@ class TensorNameMap: "model.pre_ln", # rwkv7 "model.layers.0.pre_norm", # rwkv7 "backbone.norm", # wavtokenizer + "model.embedding_norm", # lfm2 ), # Position embeddings @@ -61,7 +63,7 @@ class TensorNameMap: # Output MODEL_TENSOR.OUTPUT: ( "embed_out", # gptneox - "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe + "lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe plamo2 "output", # llama-pth bloom internlm2 "word_embeddings_for_head", # persimmon "lm_head.linear", # phi2 @@ -75,7 +77,7 @@ class TensorNameMap: MODEL_TENSOR.OUTPUT_NORM: ( "gpt_neox.final_layer_norm", # gptneox "transformer.ln_f", # gpt2 gpt-j falcon jais exaone - "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe + "model.norm", # llama-hf baichuan internlm2 olmoe olmo2 phimoe plamo2 "norm", # llama-pth "transformer.norm_f", # mpt dbrx "ln_f", # refact bloom qwen gpt2 @@ -117,13 +119,14 @@ class TensorNameMap: "transformer.h.{bid}.input_layernorm", # falcon7b "h.{bid}.input_layernorm", # bloom "transformer.h.{bid}.ln_mlp", # falcon40b - "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe + "model.layers.{bid}.input_layernorm", # llama-hf nemotron olmoe phimoe granite-hybrid "layers.{bid}.attention_norm", # llama-pth "language_model.encoder.layers.{bid}.input_layernorm", # persimmon "model.layers.{bid}.ln1", # yi "h.{bid}.ln_1", # gpt2 "transformer.h.{bid}.ln", # phi2 "model.layers.layers.{bid}.norm", # plamo + "model.layers.layers.{bid}.pre_mixer_norm", # plamo2 "model.layers.{bid}.attention_norm", # internlm2 "model.layers.{bid}.norm", # mamba-qbert "backbone.layers.{bid}.norm", # mamba @@ -134,6 +137,8 @@ class TensorNameMap: "rwkv.blocks.{bid}.ln1", # rwkv6 "model.layers.{bid}.ln1", # rwkv7 "model.layers.{bid}.input_layernorm", # llama4 + "transformer_encoder.{bid}.attention_norm", # neobert + "model.layers.{bid}.operator_norm", # lfm2 ), # Attention norm 2 @@ -159,8 +164,10 @@ class TensorNameMap: "encoder.layers.{bid}.attn.Wqkv", # nomic-bert "encoder.layers.{bid}.mixer.Wqkv", # jina "model.layers.{bid}.self_attn.qkv_proj", # phi3 + "model.layers.layers.{bid}.mixer.qkv_proj", # plamo2 "encoder.layers.{bid}.self_attention.query_key_value", # chatglm "transformer.layers.{bid}.attn.qkv_proj", # openelm + "transformer_encoder.{bid}.qkv", # neobert ), # Attention query @@ -217,6 +224,7 @@ class TensorNameMap: "transformer.h.{bid}.self_attention.dense", # falcon "h.{bid}.self_attention.dense", # bloom "model.layers.{bid}.self_attn.o_proj", # llama-hf nemotron olmoe olmo2 phimoe + "model.layers.{bid}.self_attn.out_proj", # lfm2 "model.layers.{bid}.self_attn.linear_attn", # deci "layers.{bid}.attention.wo", # llama-pth "encoder.layer.{bid}.attention.output.dense", # bert @@ -227,6 +235,7 @@ class TensorNameMap: "h.{bid}.attn.c_proj", # gpt2 "transformer.h.{bid}.mixer.out_proj", # phi2 "model.layers.layers.{bid}.self_attn.o_proj", # plamo + "model.layers.layers.{bid}.mixer.o_proj", # plamo2 "model.layers.{bid}.attention.wo", # internlm2 "encoder.layers.{bid}.attn.out_proj", # nomic-bert "encoder.layers.{bid}.mixer.out_proj", # jina @@ -236,6 +245,7 @@ class TensorNameMap: "transformer.layers.{bid}.attn.out_proj", # openelm "transformer.h.{bid}.attn.attention.out_proj", # exaone "model.layers.{bid}.self_attn.o_proj", # llama4 + "transformer_encoder.{bid}.wo", # neobert ), # Attention output norm @@ -248,8 +258,9 @@ class TensorNameMap: ), MODEL_TENSOR.ATTN_POST_NORM: ( - "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge - "model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414 + "model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge + "model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414 + "model.layers.layers.{bid}.post_mixer_norm.weight", # plamo2 ), # Rotary embeddings @@ -275,18 +286,25 @@ class TensorNameMap: "transformer.decoder_layer.{bid}.rms_norm_2", # Grok "encoder.layers.{bid}.post_attention_layernorm", # chatglm "transformer.layers.{bid}.ffn_norm", # openelm + "model.layers.{bid}.pre_ff_layernorm", # jamba granite-hybrid + "model.layers.{bid}.pre_moe_layernorm", # mini-jamba "model.layers.{bid}.post_attention_layernorm", # llama4 + "transformer_encoder.{bid}.ffn_norm", # neobert + "model.layers.layers.{bid}.pre_mlp_norm", # plamo2 ), # Post feed-forward norm MODEL_TENSOR.FFN_PRE_NORM: ( "model.layers.{bid}.pre_feedforward_layernorm", # gemma2 + "model.layers.{bid}.pre_ff_layernorm.weight", ), # Post feed-forward norm MODEL_TENSOR.FFN_POST_NORM: ( "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2 "model.layers.{bid}.post_mlp_layernorm", # glm-4-0414 + "model.layers.layers.{bid}.post_mlp_norm.weight", # plamo2 + "model.layers.{bid}.feed_forward.up_proj", ), MODEL_TENSOR.FFN_GATE_INP: ( @@ -296,8 +314,9 @@ class TensorNameMap: "transformer.decoder_layer.{bid}.router", # Grok "transformer.blocks.{bid}.ffn.router.layer", # dbrx "model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe - "model.layers.{bid}.feed_forward.router", # llama4 + "model.layers.{bid}.feed_forward.router", # llama4 jamba "encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe + "model.layers.{bid}.mlp.gate.wg", # hunyuan ), MODEL_TENSOR.FFN_GATE_INP_SHEXP: ( @@ -305,7 +324,8 @@ class TensorNameMap: ), MODEL_TENSOR.FFN_EXP_PROBS_B: ( - "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 + "model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1 + "model.layers.{bid}.mlp.moe_statics.e_score_correction", # ernie4.5-moe ), # Feed-forward up @@ -329,6 +349,7 @@ class TensorNameMap: "model.layers.{bid}.mlp.fc1", # phi2 "model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414 "model.layers.layers.{bid}.mlp.up_proj", # plamo + "model.layers.layers.{bid}.mlp.gate_up_proj", # plamo2 "model.layers.{bid}.feed_forward.w3", # internlm2 "encoder.layers.{bid}.mlp.fc11", # nomic-bert "encoder.layers.{bid}.mlp.fc1", # nomic-bert-moe @@ -339,23 +360,26 @@ class TensorNameMap: "model.layers.{bid}.residual_mlp.w3", # arctic "encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm "transformer.h.{bid}.mlp.c_fc_1", # exaone - "model.layers.{bid}.feed_forward.up_proj", # llama4 + "model.layers.{bid}.feed_forward.up_proj", # llama4 jamba granite-hybrid + "transformer_encoder.{bid}.ffn.w12", # neobert ), MODEL_TENSOR.FFN_UP_EXP: ( - "layers.{bid}.feed_forward.experts.w3", # mixtral (merged) - "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) - "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx - "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) - "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged) - "model.layers.{bid}.feed_forward.experts.up_proj", # llama4 - "encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe + "layers.{bid}.feed_forward.experts.w3", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear_v", # Grok (merged) + "transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx + "model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged) ernie4.5-moe + "model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged) + "model.layers.{bid}.feed_forward.experts.up_proj", # llama4 + "encoder.layers.{bid}.mlp.experts.mlp.w1", # nomic-bert-moe ), MODEL_TENSOR.FFN_UP_SHEXP: ( "model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe "model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2 "model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4 + "model.layers.{bid}.feed_forward.down_proj", + "model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan ), # AWQ-activation gate @@ -376,22 +400,23 @@ class TensorNameMap: "transformer.h.{bid}.mlp.linear_1", # refact "model.layers.{bid}.residual_mlp.w1", # arctic "transformer.h.{bid}.mlp.c_fc_0", # exaone - "model.layers.{bid}.feed_forward.gate_proj", # llama4 + "model.layers.{bid}.feed_forward.gate_proj", # llama4 jamba granite-hybrid ), MODEL_TENSOR.FFN_GATE_EXP: ( - "layers.{bid}.feed_forward.experts.w1", # mixtral (merged) - "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged) - "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx - "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged) - "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged) - "model.layers.{bid}.feed_forward.experts.gate_proj", # llama4 + "layers.{bid}.feed_forward.experts.w1", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear", # Grok (merged) + "transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx + "model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged) ernie4.5-moe + "model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged) + "model.layers.{bid}.feed_forward.experts.gate_proj", # llama4 ), MODEL_TENSOR.FFN_GATE_SHEXP: ( "model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe "model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2 "model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4 + "model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan ), # Feed-forward down @@ -421,18 +446,19 @@ class TensorNameMap: "encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2 "encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm "model.layers.h.{bid}.mlp.c_proj", # exaone - "model.layers.{bid}.feed_forward.down_proj", # llama4 + "model.layers.{bid}.feed_forward.down_proj", # llama4 jamba granite-hybrid + "transformer_encoder.{bid}.ffn.w3", # neobert ), MODEL_TENSOR.FFN_DOWN_EXP: ( - "layers.{bid}.feed_forward.experts.w2", # mixtral (merged) - "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) - "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx - "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) - "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe - "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged) - "model.layers.{bid}.feed_forward.experts.down_proj", # llama4 - "encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe + "layers.{bid}.feed_forward.experts.w2", # mixtral (merged) + "transformer.decoder_layer.{bid}.moe.linear_1", # Grok (merged) + "transformer.blocks.{bid}.ffn.experts.mlp.w2", # dbrx + "model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged) ernie4.5-moe + "model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe + "model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged) + "model.layers.{bid}.feed_forward.experts.down_proj", # llama4 + "encoder.layers.{bid}.mlp.experts.mlp.w2", # nomic-bert-moe ), MODEL_TENSOR.FFN_DOWN_SHEXP: ( @@ -440,24 +466,29 @@ class TensorNameMap: "model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2 "model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4 "model.layers.{bid}.shared_mlp.output_linear", # granitemoe + "model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan ), MODEL_TENSOR.ATTN_Q_NORM: ( "language_model.encoder.layers.{bid}.self_attention.q_layernorm", "model.layers.{bid}.self_attn.q_layernorm", # persimmon + "model.layers.{bid}.self_attn.query_layernorm", # hunyuan "model.layers.{bid}.self_attn.q_norm", # cohere olmoe chameleon olmo2 "transformer.blocks.{bid}.attn.q_ln", # sea-lion "encoder.layer.{bid}.attention.self.layer_norm_q", # jina-bert-v2 "transformer.layers.{bid}.attn.q_norm", # openelm + "model.layers.layers.{bid}.mixer.q", # plamo2 ), MODEL_TENSOR.ATTN_K_NORM: ( "language_model.encoder.layers.{bid}.self_attention.k_layernorm", "model.layers.{bid}.self_attn.k_layernorm", # persimmon + "model.layers.{bid}.self_attn.key_layernorm", # hunyuan "model.layers.{bid}.self_attn.k_norm", # cohere olmoe chameleon olmo2 "transformer.blocks.{bid}.attn.k_ln", # sea-lion "encoder.layer.{bid}.attention.self.layer_norm_k", # jina-bert-v2 "transformer.layers.{bid}.attn.k_norm", # openelm + "model.layers.layers.{bid}.mixer.k", # plamo2 ), MODEL_TENSOR.ROPE_FREQS: ( @@ -470,42 +501,145 @@ class TensorNameMap: "encoder.layers.{bid}.norm2", # nomic-bert "transformer.decoder_layer.{bid}.rms_norm_3", # Grok "encoder.layer.{bid}.mlp.layernorm", # jina-bert-v2 - "encoder.layer.{bid}.layer_norm_2" # jina-v2-code + "encoder.layer.{bid}.layer_norm_2", # jina-v2-code + ), + + MODEL_TENSOR.PER_LAYER_TOKEN_EMBD: ( + "model.embed_tokens_per_layer", # gemma3n + ), + + MODEL_TENSOR.PER_LAYER_MODEL_PROJ: ( + "model.per_layer_model_projection", # gemma3n + ), + + MODEL_TENSOR.PER_LAYER_PROJ_NORM: ( + "model.per_layer_projection_norm", # gemma3n + ), + + MODEL_TENSOR.ALTUP_PROJ: ( + "model.altup_projections", # gemma3n + ), + + MODEL_TENSOR.ALTUP_UNEMBD_PROJ: ( + "model.altup_unembed_projections", # gemma3n + ), + + MODEL_TENSOR.PER_LAYER_INP_GATE: ( + "model.layers.{bid}.per_layer_input_gate", # gemma3n + ), + + MODEL_TENSOR.PER_LAYER_PROJ: ( + "model.layers.{bid}.per_layer_projection", # gemma3n + ), + + MODEL_TENSOR.PER_LAYER_POST_NORM: ( + "model.layers.{bid}.post_per_layer_input_norm", # gemma3n + ), + + MODEL_TENSOR.ALTUP_CORRECT_COEF: ( + "model.layers.{bid}.altup.correction_coefs", # gemma3n + ), + + MODEL_TENSOR.ALTUP_CORRECT_SCALE: ( + "model.layers.{bid}.altup.correct_output_scale", # gemma3n + ), + + MODEL_TENSOR.ALTUP_PREDICT_COEF: ( + "model.layers.{bid}.altup.prediction_coefs", # gemma3n + ), + + MODEL_TENSOR.ALTUP_ROUTER: ( + "model.layers.{bid}.altup.modality_router", # gemma3n + ), + + MODEL_TENSOR.ALTUP_ROUTER_NORM: ( + "model.layers.{bid}.altup.router_norm", # gemma3n + ), + + MODEL_TENSOR.LAUREL_L: ( + "model.layers.{bid}.laurel.linear_left", # gemma3n + ), + + MODEL_TENSOR.LAUREL_R: ( + "model.layers.{bid}.laurel.linear_right", # gemma3n + ), + + MODEL_TENSOR.LAUREL_POST_NORM: ( + "model.layers.{bid}.laurel.post_laurel_norm", # gemma3n ), MODEL_TENSOR.SSM_IN: ( - "model.layers.{bid}.in_proj", - "backbone.layers.{bid}.mixer.in_proj", + "model.layers.{bid}.in_proj", # mamba-hf + "backbone.layers.{bid}.mixer.in_proj", # mamba + "model.layers.{bid}.mamba.in_proj", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.in_proj", # plamo2 ), MODEL_TENSOR.SSM_CONV1D: ( - "model.layers.{bid}.conv1d", - "backbone.layers.{bid}.mixer.conv1d", + "model.layers.{bid}.conv1d", # mamba-hf + "backbone.layers.{bid}.mixer.conv1d", # mamba + "model.layers.{bid}.mamba.conv1d", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.conv1d", # plamo2 ), MODEL_TENSOR.SSM_X: ( - "model.layers.{bid}.x_proj", - "backbone.layers.{bid}.mixer.x_proj", + "model.layers.{bid}.x_proj", # mamba-hf + "backbone.layers.{bid}.mixer.x_proj", # mamba + "model.layers.{bid}.mamba.x_proj", # jamba + "model.layers.layers.{bid}.mixer.bcdt_proj", # plamo2 ), MODEL_TENSOR.SSM_DT: ( - "model.layers.{bid}.dt_proj", - "backbone.layers.{bid}.mixer.dt_proj", + "model.layers.{bid}.dt_proj", # mamba-hf + "backbone.layers.{bid}.mixer.dt_proj", # mamba + "model.layers.{bid}.mamba.dt_proj", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.dt_proj", # plamo2 + ), + + MODEL_TENSOR.SSM_DT_NORM: ( + "model.layers.{bid}.mamba.dt_layernorm", # jamba ), MODEL_TENSOR.SSM_A: ( - "model.layers.{bid}.A_log", - "backbone.layers.{bid}.mixer.A_log", + "model.layers.{bid}.A_log", # mamba-hf + "backbone.layers.{bid}.mixer.A_log", # mamba + "model.layers.{bid}.mamba.A_log", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.A_log", # plamo2 + ), + + MODEL_TENSOR.SSM_B_NORM: ( + "model.layers.{bid}.mamba.b_layernorm", # jamba + "model.layers.{bid}.mamba.B_layernorm", # mini-jamba + "model.layers.layers.{bid}.mixer.B_norm.weight", # plamo2 + ), + + MODEL_TENSOR.SSM_C_NORM: ( + "model.layers.{bid}.mamba.c_layernorm", # jamba + "model.layers.{bid}.mamba.C_layernorm", # mini-jamba + "model.layers.layers.{bid}.mixer.C_norm.weight", # plamo2 ), MODEL_TENSOR.SSM_D: ( - "model.layers.{bid}.D", - "backbone.layers.{bid}.mixer.D", + "model.layers.{bid}.D", # mamba-hf + "backbone.layers.{bid}.mixer.D", # mamba + "model.layers.{bid}.mamba.D", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.D", # plamo2 + ), + + MODEL_TENSOR.SSM_DT_NORM: ( + "model.layers.layers.{bid}.mixer.dt_norm.weight", # plamo2 + ), + + MODEL_TENSOR.SSM_NORM: ( + "model.layers.{bid}.mamba.norm", # falcon-h1 granite-hybrid + "backbone.layers.{bid}.mixer.norm", # mamba2 ), MODEL_TENSOR.SSM_OUT: ( - "model.layers.{bid}.out_proj", - "backbone.layers.{bid}.mixer.out_proj", + "model.layers.{bid}.out_proj", # mamba-hf + "backbone.layers.{bid}.mixer.out_proj", # mamba + "model.layers.{bid}.mamba.out_proj", # jamba falcon-h1 granite-hybrid + "model.layers.layers.{bid}.mixer.out_proj", # plamo2 ), MODEL_TENSOR.TIME_MIX_W0: ( @@ -832,12 +966,14 @@ class TensorNameMap: # TODO: these do not belong to block_mappings_cfg - move them to mappings_cfg MODEL_TENSOR.ENC_OUTPUT_NORM: ( "encoder.final_layer_norm", # t5 + "layer_norm", # neobert ), MODEL_TENSOR.CLS: ( "classifier", # jina "classifier.dense", # roberta "pre_classifier", # distillbert + "dense", # neobert ), MODEL_TENSOR.CLS_OUT: ( @@ -905,6 +1041,18 @@ class TensorNameMap: "backbone.posnet.{bid}.proj_out", # wavtokenizer ), + MODEL_TENSOR.SHORTCONV_CONV: ( + "model.layers.{bid}.conv.conv", + ), + + MODEL_TENSOR.SHORTCONV_INPROJ: ( + "model.layers.{bid}.conv.in_proj", + ), + + MODEL_TENSOR.SHORTCONV_OUTPROJ: ( + "model.layers.{bid}.conv.out_proj", + ), + ############################################################################# ## Vision encoder diff --git a/gguf-py/gguf/vocab.py b/gguf-py/gguf/vocab.py index cca0979862a71..635fcef35e235 100644 --- a/gguf-py/gguf/vocab.py +++ b/gguf-py/gguf/vocab.py @@ -7,7 +7,10 @@ from pathlib import Path from typing import Any, Callable, Sequence, Mapping, Iterable, Protocol, ClassVar, runtime_checkable -from sentencepiece import SentencePieceProcessor +try: + from sentencepiece import SentencePieceProcessor +except ImportError: + SentencePieceProcessor = None import gguf @@ -116,6 +119,7 @@ def _set_special_token(self, typ: str, tid: Any) -> None: logger.warning(f'Special token type {typ}, id {tid} out of range, must be under {self.n_vocab} - skipping') def _try_load_from_tokenizer_json(self, path: Path) -> bool: + tokenizer = None tokenizer_file = path / 'tokenizer.json' if tokenizer_file.is_file(): with open(tokenizer_file, encoding = 'utf-8') as f: @@ -149,15 +153,110 @@ def _try_load_from_tokenizer_json(self, path: Path) -> bool: added_tokens = tokenizer.get('added_tokens', {}) else: added_tokens = {} + tokenizer_config = None tokenizer_config_file = path / 'tokenizer_config.json' - if not tokenizer_config_file.is_file(): + if tokenizer_config_file.is_file(): + with open(tokenizer_config_file, encoding = 'utf-8') as f: + tokenizer_config = json.load(f) + if tokenizer: + special_bos = (tokenizer_config or {}).get('bos_token') + special_cls = (tokenizer_config or {}).get('cls_token') + special_eos = (tokenizer_config or {}).get('eos_token') + special_sep = (tokenizer_config or {}).get('sep_token') + if not special_bos and special_cls and tokenizer_config: + tokenizer_config['bos_token'] = special_bos = special_cls + if not special_eos and special_sep and tokenizer_config: + tokenizer_config['eos_token'] = special_eos = special_sep + if post_processor := tokenizer.get('post_processor'): + for processor in post_processor.get('processors', [post_processor]): + if processor.get('type') == 'RobertaProcessing': + self.add_special_token['bos'] = True + self.add_special_token['eos'] = True + self.add_special_token['sep'] = True + if not special_cls and tokenizer_config: + special_cls = processor.get('cls', [special_bos])[0] + tokenizer_config['cls_token'] = special_cls + if not special_sep and tokenizer_config: + special_sep = processor.get('sep', [special_eos])[0] + tokenizer_config['sep_token'] = special_sep + continue + # Crude parsing of TemplateProcessing to determine if BOS/SEP/EOS should be added + # Only works with simple templates, **will** get it wrong on unusual sequences + if processor.get('type') == 'TemplateProcessing': + tmpl_single = processor.get('single', []) + tmpl_pair = processor.get('pair', []) + special_first = None + special_last = None + if len(tmpl_single) > 1: + if special_first := tmpl_single[0].get('SpecialToken', {}).get('id'): + if not tokenizer_config: + special_bos = special_first + self.add_special_token['bos'] = True if special_first in (special_bos, special_cls) else False + if special_first not in (special_bos, special_cls): + logger.warning(f'Unknown leading special token {special_first!r} in TemplateProcessing') + if special_last := tmpl_single[-1].get('SpecialToken', {}).get('id'): + if not tokenizer_config: + special_eos = special_last + elif special_last != special_eos: + if 'eot' not in self.special_token_types: + self.special_token_types = tuple(self.special_token_types) + ('eot', ) + tokenizer_config['eot_token'] = special_eos + elif 'eom' not in self.special_token_types: + self.special_token_types = tuple(self.special_token_types) + ('eom', ) + tokenizer_config['eom_token'] = special_eos + else: + logger.warning(f'Overriding EOS token {special_eos!r} with {special_last!r} without EOT/EOM fallback!') + tokenizer_config['eos_token'] = special_eos = special_last + self.add_special_token['eos'] = True if special_last == special_eos else False + if special_last != special_eos: + logger.warning(f'Unknown trailing special token {special_last!r} in TemplateProcessing') + if tmpl_pair: + seq_start = 1 if special_first and tmpl_pair[0].get('SpecialToken', {}).get('id') == special_first else 0 + seq_stop = -1 if special_last and tmpl_pair[-1].get('SpecialToken', {}).get('id') == special_last else None + if (special_first and seq_start == 0) or (special_last and seq_stop is None): + logger.warning('TemplateProcessing leading/trailing special tokens do not match TemplateProcessing') + if tmpl_pair := tmpl_pair[slice(seq_start, seq_stop)]: + tmpl_a = tmpl_pair[0].get('Sequence', {}).get('id') + tmpl_b = tmpl_pair[-1].get('Sequence', {}).get('id') + if tmpl_a != 'A' or tmpl_b != 'B': + logger.warning(f'Unknown sequence {tmpl_a}...{tmpl_b} in TemplateProcessing') + # A [sep] [eos] B + if tmpl_a == 'A' and tmpl_b == 'B' and (tmpl_pair := tmpl_pair[1:-1]): + add_sep = False + if special_entry := tmpl_pair[0].get('SpecialToken', {}).get('id'): + if special_entry in (special_sep, special_eos) and not special_last: + add_sep = True + if special_entry not in (special_sep, special_eos): + logger.warning(f'Unknown separator token {special_entry!r} in TemplateProcessing') + else: + logger.warning(f'Unknown middle sequence {tmpl_pair[0]!r} in TemplateProcessing') + if len(tmpl_pair) == 2: + if special_entry := tmpl_pair[1].get('SpecialToken', {}).get('id'): + if special_entry in (special_sep, special_eos): + add_sep = True + if special_entry not in (special_sep, special_eos): + logger.warning(f'Unknown second separator token {special_entry!r} in TemplateProcessing') + else: + logger.warning(f'Unknown second middle sequence {tmpl_pair[1]!r} in TemplateProcessing') + self.add_special_token['sep'] = add_sep + if add_sep and not special_sep and tokenizer_config: + tokenizer_config['sep_token'] = special_eos + continue + if not tokenizer_config: return True - with open(tokenizer_config_file, encoding = 'utf-8') as f: - tokenizer_config = json.load(f) chat_template_alt = None - chat_template_file = path / 'chat_template.json' - if chat_template_file.is_file(): - with open(chat_template_file, encoding = 'utf-8') as f: + chat_template_json = path / 'chat_template.json' + chat_template_jinja = path / 'chat_template.jinja' + if chat_template_jinja.is_file(): + with open(chat_template_jinja, encoding = 'utf-8') as f: + chat_template_alt = f.read() + if additional_templates := list((path / 'additional_chat_templates').glob('*.jinja')): + chat_template_alt = [{'name': 'default', 'template': chat_template_alt}] + for template_path in additional_templates: + with open(template_path, encoding = 'utf-8') as fp: + chat_template_alt.append({'name': template_path.stem, 'template': fp.read()}) + elif chat_template_json.is_file(): + with open(chat_template_json, encoding = 'utf-8') as f: chat_template_alt = json.load(f).get('chat_template') chat_template = tokenizer_config.get('chat_template', chat_template_alt) if chat_template is None or isinstance(chat_template, (str, list)): @@ -302,6 +401,9 @@ class SentencePieceVocab(Vocab): name = "spm" def __init__(self, base_path: Path): + if SentencePieceProcessor is None: + raise RuntimeError("sentencepiece is not installed") + added_tokens: dict[str, int] = {} if (fname_tokenizer := base_path / 'tokenizer.model').exists(): # normal location diff --git a/gguf-py/pyproject.toml b/gguf-py/pyproject.toml index f11351cba1767..0f3a1eeee8304 100644 --- a/gguf-py/pyproject.toml +++ b/gguf-py/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gguf" -version = "0.17.0" +version = "0.17.1" description = "Read and write ML models in GGUF for GGML" authors = ["GGML "] packages = [ @@ -22,7 +22,7 @@ python = ">=3.8" numpy = ">=1.17" tqdm = ">=4.27" pyyaml = ">=5.1" -sentencepiece = ">=0.1.98,<=0.2.0" +sentencepiece = { version = ">=0.1.98,<=0.2.0", optional = true } PySide6 = { version = "^6.9", python = ">=3.9,<3.14", optional = true } [tool.poetry.dev-dependencies] diff --git a/include/llama.h b/include/llama.h index 015a57898e22d..1c3a1cd1b4e7d 100644 --- a/include/llama.h +++ b/include/llama.h @@ -71,52 +71,13 @@ extern "C" { typedef int32_t llama_seq_id; enum llama_vocab_type { - LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab - LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback - LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE - LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece - LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram - LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization - }; - - // pre-tokenization types - enum llama_vocab_pre_type { - LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0, - LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1, - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2, - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3, - LLAMA_VOCAB_PRE_TYPE_FALCON = 4, - LLAMA_VOCAB_PRE_TYPE_MPT = 5, - LLAMA_VOCAB_PRE_TYPE_STARCODER = 6, - LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, - LLAMA_VOCAB_PRE_TYPE_REFACT = 8, - LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9, - LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10, - LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11, - LLAMA_VOCAB_PRE_TYPE_OLMO = 12, - LLAMA_VOCAB_PRE_TYPE_DBRX = 13, - LLAMA_VOCAB_PRE_TYPE_SMAUG = 14, - LLAMA_VOCAB_PRE_TYPE_PORO = 15, - LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16, - LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17, - LLAMA_VOCAB_PRE_TYPE_VIKING = 18, - LLAMA_VOCAB_PRE_TYPE_JAIS = 19, - LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20, - LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21, - LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, - LLAMA_VOCAB_PRE_TYPE_BLOOM = 23, - LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24, - LLAMA_VOCAB_PRE_TYPE_EXAONE = 25, - LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, - LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, - LLAMA_VOCAB_PRE_TYPE_GPT4O = 29, - LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30, - LLAMA_VOCAB_PRE_TYPE_TRILLION = 31, - LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32, - LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, - LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, - LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, + LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab + LLAMA_VOCAB_TYPE_SPM = 1, // LLaMA tokenizer based on byte-level BPE with byte fallback + LLAMA_VOCAB_TYPE_BPE = 2, // GPT-2 tokenizer based on byte-level BPE + LLAMA_VOCAB_TYPE_WPM = 3, // BERT tokenizer based on WordPiece + LLAMA_VOCAB_TYPE_UGM = 4, // T5 tokenizer based on Unigram + LLAMA_VOCAB_TYPE_RWKV = 5, // RWKV tokenizer based on greedy tokenization + LLAMA_VOCAB_TYPE_PLAMO2 = 6, // PLaMo-2 tokenizer based on Aho-Corasick with dynamic programming }; enum llama_rope_type { @@ -243,18 +204,21 @@ extern "C" { typedef bool (*llama_progress_callback)(float progress, void * user_data); - // Input data for llama_decode + // Input data for llama_encode/llama_decode // A llama_batch object can contain input about one or many sequences // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens // // - token : the token ids of the input (used when embd is NULL) // - embd : token embeddings (i.e. float vector of size n_embd) (used when token is NULL) // - pos : the positions of the respective token in the sequence - // (if set to NULL, the token position will be tracked automatically by llama_decode) + // (if set to NULL, the token position will be tracked automatically by llama_encode/llama_decode) // - seq_id : the sequence to which the respective token belongs // (if set to NULL, the sequence ID will be assumed to be 0) // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output - // (if set to NULL, only the logits for last token will be returned) + // (if set to NULL: + // - if embeddings: all tokens are output + // - if not: only the last token is output + // ) // typedef struct llama_batch { int32_t n_tokens; @@ -262,8 +226,8 @@ extern "C" { llama_token * token; float * embd; llama_pos * pos; - int32_t * n_seq_id; // TODO: remove, should belong to only 1 sequence - llama_seq_id ** seq_id; // TODO: become llama_seq_id * seq_id; + int32_t * n_seq_id; + llama_seq_id ** seq_id; int8_t * logits; // TODO: rename this to "output" } llama_batch; @@ -371,6 +335,9 @@ extern "C" { bool swa_full; // use full-size SWA cache (https://github.com/ggml-org/llama.cpp/pull/13194#issuecomment-2868343055) // NOTE: setting to false when n_seq_max > 1 can cause bad performance in some cases // ref: https://github.com/ggml-org/llama.cpp/pull/13845#issuecomment-2924800573 + bool kv_unified; // use a unified buffer across the input sequences when computing the attention + // try to disable when n_seq_max > 1 for improved performance when the sequences do not share a large prefix + // ref: https://github.com/ggml-org/llama.cpp/pull/14363 }; // model quantization parameters @@ -387,6 +354,7 @@ extern "C" { void * imatrix; // pointer to importance matrix data void * kv_overrides; // pointer to vector containing overrides void * tensor_types; // pointer to vector containing tensor types + void * prune_layers; // pointer to vector containing layer indices to prune } llama_model_quantize_params; typedef struct llama_logit_bias { @@ -760,7 +728,7 @@ extern "C" { // - lazily on next llama_decode() // p0 < 0 : [0, p1] // p1 < 0 : [p0, inf) - DEPRECATED(void llama_kv_self_seq_div( + DEPRECATED(LLAMA_API void llama_kv_self_seq_div( struct llama_context * ctx, llama_seq_id seq_id, llama_pos p0, @@ -940,12 +908,14 @@ extern "C" { // Requires the context to have a memory. // For encode-decoder contexts, processes the batch using the decoder. // Positive return values does not mean a fatal error, but rather a warning. - // Upon non-zero return values, the memory state is restored to the state before this call + // Upon fatal-error or abort, the ubatches that managed to be been processed will remain in the memory state of the context + // To handle this correctly, query the memory state using llama_memory_seq_pos_min() and llama_memory_seq_pos_max() + // Upon other return values, the memory state is restored to the state before this call // 0 - success // 1 - could not find a KV slot for the batch (try reducing the size of the batch or increase the context) - // 2 - aborted + // 2 - aborted (processed ubatches will remain in the context's memory) // -1 - invalid input batch - // < -1 - error + // < -1 - fatal error (processed ubatches will remain in the context's memory) LLAMA_API int32_t llama_decode( struct llama_context * ctx, struct llama_batch batch); @@ -961,8 +931,8 @@ extern "C" { // Get the number of threads used for prompt and batch processing (multiple token). LLAMA_API int32_t llama_n_threads_batch(struct llama_context * ctx); - // Set whether the model is in embeddings mode or not - // If true, embeddings will be returned but logits will not + // Set whether the context outputs embeddings or not + // TODO: rename to avoid confusion with llama_get_embeddings() LLAMA_API void llama_set_embeddings(struct llama_context * ctx, bool embeddings); // Set whether to use causal attention or not @@ -1038,9 +1008,11 @@ extern "C" { LLAMA_API llama_token llama_vocab_sep(const struct llama_vocab * vocab); // sentence separator LLAMA_API llama_token llama_vocab_nl (const struct llama_vocab * vocab); // next-line LLAMA_API llama_token llama_vocab_pad(const struct llama_vocab * vocab); // padding + LLAMA_API llama_token llama_vocab_mask(const struct llama_vocab * vocab); // mask LLAMA_API bool llama_vocab_get_add_bos(const struct llama_vocab * vocab); LLAMA_API bool llama_vocab_get_add_eos(const struct llama_vocab * vocab); + LLAMA_API bool llama_vocab_get_add_sep(const struct llama_vocab * vocab); LLAMA_API llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab); LLAMA_API llama_token llama_vocab_fim_suf(const struct llama_vocab * vocab); @@ -1084,6 +1056,7 @@ extern "C" { /// @param tokens The tokens pointer must be large enough to hold the resulting tokens. /// @return Returns the number of tokens on success, no more than n_tokens_max /// @return Returns a negative number on failure - the number of tokens that would have been returned + /// @return Returns INT32_MIN on overflow (e.g., tokenization result size exceeds int32_t limit) /// @param add_special Allow to add BOS and EOS tokens if model is configured to do so. /// @param parse_special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated /// as plaintext. Does not insert a leading space. @@ -1421,6 +1394,7 @@ extern "C" { int32_t n_p_eval; int32_t n_eval; + int32_t n_reused; // number of times a ggml compute graph had been reused }; struct llama_perf_sampler_data { diff --git a/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja b/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja new file mode 100644 index 0000000000000..19a3eaee49be6 --- /dev/null +++ b/models/templates/Mistral-Small-3.2-24B-Instruct-2506.jinja @@ -0,0 +1,124 @@ +{%- set today = strftime_now("%Y-%m-%d") %} +{%- set default_system_message = "You are Mistral Small 3, a Large Language Model (LLM) created by Mistral AI, a French startup headquartered in Paris.\nYour knowledge base was last updated on 2023-10-01. The current date is " + today + ".\n\nWhen you're not sure about some information or when the user's request requires up-to-date or specific data, you must use the available tools to fetch the information. Do not hesitate to use tools whenever they can provide a more accurate or complete response. If no relevant tools are available, then clearly state that you don't have the information and avoid making up anything. + +If the user's question is not clear, ambiguous, or does not provide enough context for you to accurately answer the question, you do not try to answer it right away and you rather ask the user to clarify their request (e.g. \"What are some good restaurants around me?\" => \"Where are you?\" or \"When is the next flight to Tokyo\" => \"Where do you travel from?\"). +You are always very attentive to dates, and when asked about information at specific dates, you discard information that is at another date. +You follow these instructions in all languages, and always respond to the user in the language they use or request. +Next sections describe the capabilities that you have. + +# WEB BROWSING INSTRUCTIONS + +You cannot perform any web search or access internet to open URLs, links etc. If it seems like the user is expecting you to do so, you clarify the situation and ask the user to copy paste the text directly in the chat. + +# MULTI-MODAL INSTRUCTIONS + +You have the ability to read images, but you cannot generate images. You also cannot transcribe audio files or videos. +You cannot read nor transcribe audio files or videos. + +# TOOL CALLING INSTRUCTIONS + +You may have access to tools that you can use to fetch information or perform actions. You must use these tools in the following situations: + +1. When the request requires up-to-date information. +2. When the request requires specific data that you do not have in your knowledge base. +3. When the request involves actions that you cannot perform without tools. + +Always prioritize using tools to provide the most accurate and helpful response. If tools are not available, inform the user that you cannot perform the requested action at the moment." %} + +{{- bos_token }} + +{%- set system_prompt = default_system_message %} +{%- set loop_messages = messages %} + +{%- if not tools is defined %} + {%- set tools = none %} +{%- endif %} + +{%- if messages|length > 0 and messages[0]['role'] == 'system' %} + {%- if messages[0]['content'] is string %} + {%- set system_prompt = messages[0]['content'] %} + {%- else %} + {%- set system_prompt = messages[0]['content'][0]['text'] %} + {%- endif %} + {%- set loop_messages = messages[1:] %} +{%- endif %} + +{%- set user_messages = loop_messages | selectattr("role", "equalto", "user") | list %} + +{%- set ns = namespace(index=0) %} +{%- for message in loop_messages %} + {%- if not (message.role == "tool" or (message.get('tool_calls'))) %} + {%- if (message["role"] == "user") != (ns.index % 2 == 0) %} + {{- raise_exception("After the optional system message, conversation roles must alternate user/assistant/user/assistant/...") }} + {%- endif %} + {%- set ns.index = ns.index + 1 %} + {%- endif %} +{%- endfor %} + +{{- '[SYSTEM_PROMPT]' + system_prompt + '[/SYSTEM_PROMPT]' }} + +{%- for message in loop_messages %} + {%- if message['role'] == 'system' %} + {%- if message['content'] is string %} + {{- '[SYSTEM_PROMPT]' + message['content'] + '[/SYSTEM_PROMPT]' }} + {%- else %} + {{- '[SYSTEM_PROMPT]' + message['content'][0]['text'] + '[/SYSTEM_PROMPT]' }} + {%- endif %} + {%- elif message['role'] == 'user' %} + {%- if tools is not none and (message == user_messages[-1]) %} + {{- '[AVAILABLE_TOOLS]' + tools|tojson + '[/AVAILABLE_TOOLS]' }} + {%- endif %} + {{- '[INST]' }} + {%- if message['content'] is string %} + {{- message['content'] }} + {%- else %} + {%- for block in message['content'] %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- elif block['type'] in ['image', 'image_url'] %} + {{- '[IMG]' }} + {%- else %} + {{- raise_exception('Only text and image blocks are supported in message content!') }} + {%- endif %} + {%- endfor %} + {%- endif %} + {{- '[/INST]' }} + {%- elif message['role'] == 'assistant' %} + {%- if message.get('tool_calls') %} + {%- for tool_call in message.tool_calls %} + {{- '[TOOL_CALLS]' + tool_call.function.name }} + {%- if not tool_call.id is defined or tool_call.id is not string or tool_call.id|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- '[CALL_ID]' + tool_call.id }} + {{- '[ARGS]' + tool_call['function']['arguments']|tojson }} + {%- endfor %} + {{- eos_token }} + {%- elif message['content'] is string %} + {{- message['content'] + eos_token }} + {%- else %} + {%- for block in message['content'] %} + {%- if block['type'] == 'text' %} + {{- block['text'] }} + {%- elif block['type'] in ['image', 'image_url'] %} + {{- '[IMG]' }} + {%- else %} + {{- raise_exception('Only text and image blocks are supported in assistant content!') }} + {%- endif %} + {%- endfor %} + {{- eos_token }} + {%- endif %} + {%- elif message['role'] == 'tool_results' or message['role'] == 'tool' %} + {%- if message.content is defined and message.content.content is defined %} + {%- set content = message.content.content %} + {%- else %} + {%- set content = message.content %} + {%- endif %} + {%- if not message.tool_call_id is defined or message.tool_call_id is not string or message['tool_call_id']|length != 9 %} + {{- raise_exception("Tool call IDs should be alphanumeric strings with length 9!") }} + {%- endif %} + {{- '[TOOL_RESULTS]' + message.tool_call_id + '[TOOL_CONTENT]' + content|string + '[/TOOL_RESULTS]' }} + {%- else %} + {{- raise_exception('Only system, user, assistant, and tool roles are supported!') }} + {%- endif %} +{%- endfor %} diff --git a/models/templates/llama-cpp-rwkv-world.jinja b/models/templates/llama-cpp-rwkv-world.jinja new file mode 100644 index 0000000000000..690223f1b03fe --- /dev/null +++ b/models/templates/llama-cpp-rwkv-world.jinja @@ -0,0 +1,34 @@ +{%- if not add_generation_prompt is defined -%} + {%- set add_generation_prompt = true -%} +{%- endif -%} +{%- set ns = namespace(system_prompt='') -%} +{%- for message in messages -%} + {%- if message['role'] == 'system' -%} + {%- set ns.system_prompt = message['content'] -%} + {%- endif -%} +{%- endfor -%} +{{bos_token}} +{%- if ns.system_prompt != '' -%} +{{- 'System: ' + ns.system_prompt + '\n\n' -}} +{%- endif -%} +{%- for message in messages -%} + {%- if message['role'] == 'user' -%} + {{- 'User: ' + message['content']|trim + '\n\n' -}} + {%- endif -%} + {%- if message['role'] == 'assistant' and message['content'] is not none -%} + {%- set content = message['content'] -%} + {%- if '
' in content -%} + {%- set content = content.split('')[-1] -%} + {%- endif -%} + {{- 'Assistant: ' + content|trim + '\n\n' -}} + {%- endif -%} +{%- endfor -%} +{%- if add_generation_prompt -%} + {{- 'Assistant:' -}} + {%- if enable_thinking is defined and enable_thinking is false %} + {{- ' \n' }} + {%- endif %} + {%- if enable_thinking is defined and enable_thinking is true %} + {{- ' ' }} + {%- endif %} +{%- endif -%} \ No newline at end of file diff --git a/models/templates/moonshotai-Kimi-K2.jinja b/models/templates/moonshotai-Kimi-K2.jinja new file mode 100644 index 0000000000000..ecb49a210852c --- /dev/null +++ b/models/templates/moonshotai-Kimi-K2.jinja @@ -0,0 +1,43 @@ +{%- if tools -%} + <|im_system|>tool_declare<|im_middle|>{{ tools | tojson }}<|im_end|> +{%- endif -%} +{%- for message in messages -%} + {%- if loop.first and messages[0]['role'] != 'system' -%} + <|im_system|>system<|im_middle|>You are a helpful assistant<|im_end|> + {%- endif -%} + {%- if message['role'] == 'system' -%} + <|im_system|>system<|im_middle|> + {%- elif message['role'] == 'user' -%} + <|im_user|>user<|im_middle|> + {%- elif message['role'] == 'assistant' -%} + <|im_assistant|>assistant<|im_middle|> + {%- elif message['role'] == 'tool' -%} + <|im_system|>tool<|im_middle|> + {%- endif -%} + {%- if message['role'] == 'assistant' and message.get('tool_calls') -%} + {%- if message['content'] -%}{{ message['content'] }}{%- endif -%} + <|tool_calls_section_begin|> + {%- for tool_call in message['tool_calls'] -%} + {%- set func_name = tool_call['function']['name'] -%} + {%- set formatted_id = 'functions.' + func_name + ':' + loop.index0|string -%} + <|tool_call_begin|>{{ formatted_id }}<|tool_call_argument_begin|>{{ tool_call['function']['arguments'] | tojson}}<|tool_call_end|> + {%- endfor -%} + <|tool_calls_section_end|> + {%- elif message['role'] == 'tool' -%} + ## Return of {{ message.tool_call_id }}\n{{ message['content'] }} + {%- elif message['content'] is string -%} + {{ message['content'] }} + {%- elif message['content'] is not none -%} + {% for content in message['content'] -%} + {% if content['type'] == 'image' or 'image' in content or 'image_url' in content -%} + <|media_start|>image<|media_content|><|media_pad|><|media_end|> + {% else -%} + {{ content['text'] }} + {%- endif -%} + {%- endfor -%} + {%- endif -%} + <|im_end|> +{%- endfor -%} +{%- if add_generation_prompt -%} + <|im_assistant|>assistant<|im_middle|> +{%- endif -%} diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index 9fa7d4d0abdec..56b6752ac0645 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -3,6 +3,7 @@ -r ../tools/server/tests/requirements.txt -r ./requirements-compare-llama-bench.txt +-r ./requirements-server-bench.txt -r ./requirements-pydantic.txt -r ./requirements-test-tokenizer-random.txt diff --git a/requirements/requirements-server-bench.txt b/requirements/requirements-server-bench.txt new file mode 100644 index 0000000000000..ea5849fa104ef --- /dev/null +++ b/requirements/requirements-server-bench.txt @@ -0,0 +1,5 @@ +datasets~=3.2.0 +matplotlib~=3.10.0 +numpy~=1.26.4 +requests~=2.32.3 +tqdm~=4.67.1 diff --git a/scripts/apple/validate-apps.sh b/scripts/apple/validate-apps.sh index a571aa6fcf582..f0475758c37ab 100755 --- a/scripts/apple/validate-apps.sh +++ b/scripts/apple/validate-apps.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash ./scripts/apple/validate-ios.sh ./scripts/apple/validate-macos.sh ./scripts/apple/validate-visionos.sh diff --git a/scripts/apple/validate-ios.sh b/scripts/apple/validate-ios.sh index 7bda1b9729978..50800d84a0c1d 100755 --- a/scripts/apple/validate-ios.sh +++ b/scripts/apple/validate-ios.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-ios.sh - Validate iOS Application with embedded llama.xcframework using SwiftUI # Authentication options (optional) (can be set via environment variables) diff --git a/scripts/apple/validate-macos.sh b/scripts/apple/validate-macos.sh index 6dc28e694943b..fa800ee682027 100755 --- a/scripts/apple/validate-macos.sh +++ b/scripts/apple/validate-macos.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-macos.sh - Validate macOS Application with embedded llama.xcframework using SwiftUI # Authentication options (optional) (can be set via environment variables) diff --git a/scripts/apple/validate-tvos.sh b/scripts/apple/validate-tvos.sh index 6120189e84b28..b4da698749c58 100755 --- a/scripts/apple/validate-tvos.sh +++ b/scripts/apple/validate-tvos.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-tvos.sh - Validate tvOS Application with embedded llama.xcframework using SwiftUI # Authentication options (optional) (can be set via environment variables) diff --git a/scripts/apple/validate-visionos.sh b/scripts/apple/validate-visionos.sh index a18ddcce4a0b2..bbdec6602679c 100755 --- a/scripts/apple/validate-visionos.sh +++ b/scripts/apple/validate-visionos.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # validate-visionos.sh - Validate visionOS Application with embedded llama.xcframework using SwiftUI # Authentication options (optional) (can be set via environment variables) diff --git a/scripts/check-requirements.sh b/scripts/check-requirements.sh index 4c3b05f68b7ba..da2357d76c7a6 100755 --- a/scripts/check-requirements.sh +++ b/scripts/check-requirements.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail # diff --git a/scripts/ci-run.sh b/scripts/ci-run.sh index 06b5d9c6e5949..5877a7edab166 100755 --- a/scripts/ci-run.sh +++ b/scripts/ci-run.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail this=$(realpath "$0"); readonly this cd "$(dirname "$this")" diff --git a/scripts/compare-commits.sh b/scripts/compare-commits.sh index 94a8eceb302b9..051a7a0983fe1 100755 --- a/scripts/compare-commits.sh +++ b/scripts/compare-commits.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash if [ $# -lt 2 ]; then echo "usage: ./scripts/compare-commits.sh [additional llama-bench arguments]" diff --git a/scripts/create_ops_docs.py b/scripts/create_ops_docs.py new file mode 100755 index 0000000000000..92dae9e88994b --- /dev/null +++ b/scripts/create_ops_docs.py @@ -0,0 +1,196 @@ +#!/usr/bin/env python3 + +""" +This script parses docs/ops/*.csv and creates the ops.md, which is a table documenting supported operations on various ggml backends. +""" +import csv +import logging +import sys +from pathlib import Path +from collections import defaultdict + + +class DocsGenerator: + def __init__(self, ggml_root: str, output_filename: str = "ops.md"): + self.ggml_root = Path(ggml_root) + self.ops_dir = self.ggml_root / "docs" / "ops" + self.output_filename = output_filename + self.backend_support: dict[str, dict[str, list[bool]]] = defaultdict( + lambda: defaultdict(list) + ) + self.all_operations: set[str] = set() + self.all_backends: set[str] = set() + self.logger = logging.getLogger(__name__) + + def parse_support_files(self) -> None: + if not self.ops_dir.exists(): + self.logger.warning(f"ops directory not found: {self.ops_dir}") + return + + self.logger.info(f"Parsing support files from {self.ops_dir}...") + + for support_file in self.ops_dir.glob("*.csv"): + self.logger.info(f" Reading: {support_file.name}") + self._parse_support_file(support_file) + + def _parse_support_file(self, file_path: Path) -> None: + try: + with open(file_path, "r", newline='') as f: + reader = csv.DictReader(f) + + for row in reader: + # Skip rows that don't have support mode + if row.get('test_mode') != 'support': + continue + + backend_name = row.get('backend_name', '').strip() + operation = row.get('op_name', '').strip() + supported_str = row.get('error_message', '').strip() # "yes" or "no" + backend_reg_name = row.get('backend_reg_name', '').strip() + + # Skip invalid or error operations + if not operation or not backend_name or operation in [ + "CONTEXT_ERROR", + "BUILD_ERROR", + ]: + continue + + is_supported = supported_str.lower() == "yes" + + # Use backend_reg_name for grouping, fallback to backend_name + backend_key = backend_reg_name if backend_reg_name else backend_name + + self.all_backends.add(backend_key) + self.backend_support[backend_key][operation].append(is_supported) + self.all_operations.add(operation) + + except Exception as e: + self.logger.error(f" Error parsing {file_path}: {e}") + + def get_backend_support_status(self, backend: str, operation: str) -> str: + support_list = self.backend_support[backend].get(operation, []) + + if not support_list: + return "unsupported" + + all_supported = all(support_list) + any_supported = any(support_list) + + if all_supported: + return "supported" + elif any_supported: + return "partially supported" + else: + return "unsupported" + + def get_support_status(self, operation: str) -> str: + if operation not in self.all_operations: + return "unsupported" + + support_count = 0 + total_backends = len(self.all_backends) + + for backend in self.all_backends: + if self.backend_support[backend].get(operation, False): + support_count += 1 + + if support_count == 0: + return "unsupported" + elif support_count == total_backends: + return "supported" + else: + return "partially supported" + + def get_support_symbol(self, status: str) -> str: + symbols = {"supported": "✅", "partially supported": "🟡", "unsupported": "❌"} + return symbols.get(status, "❓") + + def generate_markdown(self) -> str: + lines = [] + + lines.append("# GGML Operations") + lines.append("") + lines.append("List of GGML operations and backend support status.") + lines.append("") + lines.append("Legend:") + lines.append("- ✅ Fully supported by this backend") + lines.append("- 🟡 Partially supported by this backend") + lines.append("- ❌ Not supported by this backend") + lines.append("") + + backends = sorted(self.all_backends) + header = "| Operation |" + for backend in backends: + header += f" {backend} |" + + separator = "|-----------|" + for _ in backends: + separator += "------|" + + lines.append(header) + lines.append(separator) + + sorted_operations = sorted(self.all_operations) + + for operation in sorted_operations: + row = f"| {operation:>32} |" + + for backend in backends: + status = self.get_backend_support_status(backend, operation) + if status == "supported": + symbol = "✅" + elif status == "partially supported": + symbol = "🟡" + else: + symbol = "❌" + row += f" {symbol} |" + + lines.append(row) + + lines.append("") + + return "\n".join(lines) + + def run(self) -> None: + self.logger.info("Parsing GGML operation support files...") + self.parse_support_files() + + if not self.all_operations: + self.logger.error( + "No operations found. Make sure to run test-backend-ops support --output csv > docs/ops/file.csv first." + ) + return + + self.logger.info( + f"Found {len(self.all_operations)} operations across {len(self.all_backends)} backends" + ) + + self.logger.info("Generating markdown...") + markdown_content = self.generate_markdown() + + docs_dir = self.ggml_root / "docs" + docs_dir.mkdir(exist_ok=True) + + ops_file = docs_dir / self.output_filename + with open(ops_file, "w") as f: + f.write(markdown_content) + + self.logger.info(f"Generated: {ops_file}") + self.logger.info(f"Operations: {len(self.all_operations)}") + self.logger.info(f"Backends: {len(self.all_backends)}") + + +def main(): + logging.basicConfig(level=logging.INFO) + + if len(sys.argv) > 1: + output_filename = sys.argv[1] + else: + output_filename = "ops.md" + + generator = DocsGenerator(".", output_filename) + generator.run() + + +if __name__ == "__main__": + main() diff --git a/scripts/debug-test.sh b/scripts/debug-test.sh index c6c1e988a0027..7e9e8421b00f7 100755 --- a/scripts/debug-test.sh +++ b/scripts/debug-test.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash PROG=${0##*/} build_dir="build-ci-debug" diff --git a/scripts/gen-authors.sh b/scripts/gen-authors.sh index 3ef8391cc9c68..73e7b386f97f2 100755 --- a/scripts/gen-authors.sh +++ b/scripts/gen-authors.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash printf "# date: $(date)\n" > AUTHORS printf "# this file is auto-generated by scripts/gen-authors.sh\n\n" >> AUTHORS diff --git a/scripts/get-hellaswag.sh b/scripts/get-hellaswag.sh index 4e1b1cc15f01a..484e56fd8f685 100755 --- a/scripts/get-hellaswag.sh +++ b/scripts/get-hellaswag.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash wget https://raw.githubusercontent.com/klosax/hellaswag_text_data/main/hellaswag_val_full.txt diff --git a/scripts/get-pg.sh b/scripts/get-pg.sh index b027793e19f7a..f180bf8340241 100755 --- a/scripts/get-pg.sh +++ b/scripts/get-pg.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash function usage { echo "usage: $0" diff --git a/scripts/get-wikitext-103.sh b/scripts/get-wikitext-103.sh index 9c65fafbcc50b..244a371baddc6 100755 --- a/scripts/get-wikitext-103.sh +++ b/scripts/get-wikitext-103.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash wget https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-raw-v1.zip diff --git a/scripts/get-wikitext-2.sh b/scripts/get-wikitext-2.sh index 5f3845ef59a9e..67b0b0118b41c 100755 --- a/scripts/get-wikitext-2.sh +++ b/scripts/get-wikitext-2.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash wget https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip unzip wikitext-2-raw-v1.zip diff --git a/scripts/get-winogrande.sh b/scripts/get-winogrande.sh index f1fc0e2d47adb..2b48b11756647 100755 --- a/scripts/get-winogrande.sh +++ b/scripts/get-winogrande.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash wget https://huggingface.co/datasets/ikawrakow/winogrande-eval-for-llama.cpp/raw/main/winogrande-debiased-eval.csv diff --git a/scripts/hf.sh b/scripts/hf.sh index b251925fa453f..e41b9053afdf2 100755 --- a/scripts/hf.sh +++ b/scripts/hf.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Shortcut for downloading HF models # diff --git a/scripts/qnt-all.sh b/scripts/qnt-all.sh index bc43738a2f498..dc04670dff55b 100755 --- a/scripts/qnt-all.sh +++ b/scripts/qnt-all.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash qnt=(q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k) args="" diff --git a/scripts/run-all-perf.sh b/scripts/run-all-perf.sh index 6384e364d5584..b7de764ff83bf 100755 --- a/scripts/run-all-perf.sh +++ b/scripts/run-all-perf.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k) args="-ngl 999 -n 64 -p 512" diff --git a/scripts/run-all-ppl.sh b/scripts/run-all-ppl.sh index e15f74f1b666d..918ecda27913d 100755 --- a/scripts/run-all-ppl.sh +++ b/scripts/run-all-ppl.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash qnt=(f16 q8_0 q6_k q5_k q5_1 q5_0 q4_k q4_1 q4_0 q3_k q2_k) args="-ngl 999 -t 8" diff --git a/scripts/server-bench.py b/scripts/server-bench.py new file mode 100755 index 0000000000000..3afad66ced47b --- /dev/null +++ b/scripts/server-bench.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python3 + +import argparse +import json +import os +import random +import subprocess +from time import sleep, time +from typing import Optional, Union + +import datasets +import logging +import matplotlib.pyplot as plt +import numpy as np +import requests +from tqdm.contrib.concurrent import thread_map + + +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger("server-bench") + + +def get_prompts_text(dataset_name: str, n_prompts: int) -> Optional[list[str]]: + ret = [] + if dataset_name.lower() == "mmlu": + logger.info("Loading MMLU dataset...") + ret = datasets.load_dataset("cais/mmlu", "all")["test"]["question"] # type: ignore + else: + return None + if n_prompts >= 0: + ret = ret[:n_prompts] + return ret + + +def get_prompt_lengths_rng(n_prompts: int, prompt_length_min: int, prompt_length_max: int) -> list[int]: + assert n_prompts >= 0 + ret: list[int] = [] + for i in range(n_prompts): + random.seed(13 * i + 0) + ret.append(random.randint(prompt_length_min, prompt_length_max)) + return ret + + +def get_prompts_rng(prompt_lengths: list[int]) -> list[list[int]]: + return [[random.randint(100, 10000) for _ in range(pl)] for pl in prompt_lengths] + + +def get_server(path_server: str, path_log: Optional[str]) -> dict: + logger.info("Starting the llama.cpp server...") + hostname: str = os.environ.get("LLAMA_ARG_HOST", "127.0.0.1") + port: str = os.environ.get("LLAMA_ARG_PORT", "8080") + address: str = f"http://{hostname}:{port}" + + fout = open(path_log, "w") if path_log is not None else subprocess.DEVNULL + process = subprocess.Popen([path_server], stdout=fout, stderr=subprocess.STDOUT) + + n_failures: int = 0 + while True: + try: + sleep(1.0) + exit_code = process.poll() + if exit_code is not None: + raise RuntimeError(f"llama.cpp server exited unexpectedly with exit code {exit_code}, see {path_log}") + response = requests.get(f"{address}/health") + if response.status_code == 200: + break + except requests.ConnectionError: + n_failures += 1 + if n_failures >= 10: + raise RuntimeError("llama.cpp server is not healthy after 10 seconds") + + return {"process": process, "address": address, "fout": fout} + + +def get_prompt_length(data: dict) -> int: + session = data["session"] + server_address: str = data["server_address"] + + response = session.post( + f"{server_address}/apply-template", + json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]} + ) + if response.status_code != 200: + raise RuntimeError(f"Server returned status code {response.status_code}: {response.text}") + prompt: str = json.loads(response.text)["prompt"] + response = session.post( + f"{server_address}/tokenize", + json={"content": prompt, "add_special": True} + ) + if response.status_code != 200: + raise RuntimeError(f"Server returned status code {response.status_code}: {response.text}") + tokens: list[str] = json.loads(response.text)["tokens"] + return len(tokens) + + +def send_prompt(data: dict) -> tuple[float, list[float]]: + session = data["session"] + server_address: str = data["server_address"] + + t_submit = time() + if data["synthetic_prompt"]: + json_data: dict = { + "prompt": data["prompt"], "ignore_eos": True, "cache_prompt": False, + "seed": data["seed"], "n_predict": data["n_predict"], "stream": True} + response = session.post(f"{server_address}/completion", json=json_data, stream=True) + else: + response = session.post( + f"{server_address}/apply-template", + json={"messages": [{"role": "user", "content": data["prompt"], "stream": True}]} + ) + if response.status_code != 200: + raise RuntimeError(f"Server returned status code {response.status_code}: {response.text}") + prompt: str = json.loads(response.text)["prompt"] + + json_data: dict = {"prompt": prompt, "seed": data["seed"], "n_predict": data["n_predict"], "stream": True} + response = session.post(f"{server_address}/completion", json=json_data, stream=True) + + token_arrival_times: list[float] = [] + for line in response.iter_lines(decode_unicode=False): + if not line.startswith(b"data: "): + continue + token_arrival_times.append(time()) + token_arrival_times = token_arrival_times[:-1] + + if response.status_code != 200: + raise RuntimeError(f"Server returned status code {response.status_code}: {response.text}") + + return (t_submit, token_arrival_times) + + +def benchmark(path_server: str, path_log: Optional[str], prompt_source: str, n_prompts: int, n_predict: int, n_predict_min: int): + if os.environ.get("LLAMA_ARG_N_PARALLEL") is None: + logger.info("LLAMA_ARG_N_PARALLEL not explicitly set, using 32") + os.environ["LLAMA_ARG_N_PARALLEL"] = "32" + if os.environ.get("LLAMA_ARG_N_GPU_LAYERS") is None: + logger.info("LLAMA_ARG_N_GPU_LAYERS not explicitly set, using 999") + os.environ["LLAMA_ARG_N_GPU_LAYERS"] = "999" + if os.environ.get("LLAMA_ARG_FLASH_ATTN") is None: + logger.info("LLAMA_ARG_FLASH_ATTN not explicitly set, using 'true'") + os.environ["LLAMA_ARG_FLASH_ATTN"] = "true" + + parallel: int = int(os.environ.get("LLAMA_ARG_N_PARALLEL", 1)) + prompts: Union[None, list[str], list[list[int]]] = get_prompts_text(prompt_source, n_prompts) + synthetic_prompts: bool = prompts is None + prompt_n = [] + + if synthetic_prompts: + prompt_source_split: list[str] = prompt_source.split("-") + assert len(prompt_source_split) == 3 + assert prompt_source_split[0].lower() == "rng" + prompt_length_min: int = int(prompt_source_split[1]) + prompt_length_max: int = int(prompt_source_split[2]) + logger.info("Generating random prompts...") + prompt_n = get_prompt_lengths_rng(n_prompts, prompt_length_min, prompt_length_max) + prompts = get_prompts_rng(prompt_n) + else: + n_predict_min = n_predict + + if os.environ.get("LLAMA_ARG_CTX_SIZE") is None: + context_per_slot: int = int(1.05 * (n_predict + (np.max(prompt_n) if synthetic_prompts else 2048))) + context_total: int = context_per_slot * parallel + os.environ["LLAMA_ARG_CTX_SIZE"] = str(context_total) + logger.info(f"LLAMA_ARG_CTX_SIZE not explicitly set, using {context_total} ({context_per_slot} per slot).") + + server: Optional[dict] = None + session = None + try: + server = get_server(path_server, path_log) + server_address: str = server["address"] + + adapter = requests.adapters.HTTPAdapter(pool_connections=parallel, pool_maxsize=parallel) # type: ignore + session = requests.Session() + session.mount("http://", adapter) + session.mount("https://", adapter) + + data: list[dict] = [] + + for i, p in enumerate(prompts): + random.seed(13 * i + 1) + data.append({ + "session": session, "server_address": server_address, "prompt": p, "synthetic_prompt": synthetic_prompts, + "n_predict": random.randint(n_predict_min, n_predict), "seed": 13 * i + 2}) + + if not synthetic_prompts: + logger.info("Getting the prompt lengths...") + prompt_n = [get_prompt_length(d) for d in data] + + logger.info("Starting the benchmark...\n") + t0 = time() + results: list[tuple[float, list[float]]] = thread_map(send_prompt, data, max_workers=parallel, chunksize=1) + finally: + if server is not None: + server["process"].terminate() + server["process"].wait() + if session is not None: + session.close() + + prompt_t = [] + token_t = [] + depth_sum: int = 0 + for pn, (t_submit, tat) in zip(prompt_n, results): + prompt_t.append(tat[0] - t_submit) + token_t += tat + n_tokens: int = len(tat) + depth_sum += n_tokens * pn + depth_sum += n_tokens * (n_tokens + 1) // 2 + assert len(token_t) > 0 + prompt_n = np.array(prompt_n, dtype=np.int64) + prompt_t = np.array(prompt_t, dtype=np.float64) + token_t = np.array(token_t, dtype=np.float64) + + token_t -= t0 + token_t_last = np.max(token_t) + + logger.info("") + logger.info(f"Benchmark duration: {token_t_last:.2f} s") + logger.info(f"Request throughput: {n_prompts / token_t_last:.2f} requests/s = {n_prompts / (token_t_last/60):.2f} requests/min") + logger.info(f"Total prompt length: {np.sum(prompt_n)} tokens") + logger.info(f"Average prompt length: {np.mean(prompt_n):.2f} tokens") + logger.info(f"Average prompt latency: {1e3 * np.mean(prompt_t):.2f} ms") + logger.info(f"Average prompt speed: {np.sum(prompt_n) / np.sum(prompt_t):.2f} tokens/s") + logger.info(f"Total generated tokens: {token_t.shape[0]}") + logger.info(f"Average generation depth: {depth_sum / token_t.shape[0]:.2f} tokens") + logger.info(f"Average total generation speed: {token_t.shape[0] / token_t_last:.2f} tokens/s") + logger.info(f"Average generation speed per slot: {token_t.shape[0] / (parallel * token_t_last):.2f} tokens/s / slot") + logger.info("") + logger.info( + "The above numbers are the speeds as observed by the Python script and may differ from the performance reported by the server, " + "particularly when the server is fast vs. the network or Python script (e.g. when serving a very small model).") + + plt.figure() + plt.scatter(prompt_n, 1e3 * prompt_t, s=10.0, marker=".", alpha=0.25) + plt.xlim(0, 1.05e0 * np.max(prompt_n)) + plt.ylim(0, 1.05e3 * np.max(prompt_t)) + plt.xlabel("Prompt length [tokens]") + plt.ylabel("Time to first token [ms]") + plt.savefig("prompt_time.png", dpi=240) + + bin_max = np.ceil(token_t_last) + 1 + plt.figure() + plt.hist(token_t, np.arange(0, bin_max)) + plt.xlim(0, bin_max + 1) + plt.xlabel("Time [s]") + plt.ylabel("Num. tokens generated per second") + plt.savefig("gen_rate.png", dpi=240) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Tool for benchmarking the throughput of the llama.cpp HTTP server. " + "Results are printed to console and visualized as plots (saved to current working directory). " + "To pass arguments such as the model path to the server, set the corresponding environment variables (see llama-server --help).") + parser.add_argument("--path_server", type=str, default="llama-server", help="Path to the llama.cpp server binary") + parser.add_argument("--path_log", type=str, default="server-bench.log", help="Path to the model to use for the benchmark") + parser.add_argument( + "--prompt_source", type=str, default="rng-1024-2048", + help="How to get the prompts for the benchmark, either 'mmlu' for MMLU questions or " + "rng-MIN-MAX for synthetic prompts with random lengths in the interval [MIN, MAX]") + parser.add_argument("--n_prompts", type=int, default=100, help="Number of prompts to evaluate") + parser.add_argument("--n_predict", type=int, default=2048, help="Max. number of tokens to predict per prompt") + parser.add_argument( + "--n_predict_min", type=int, default=1024, + help="Min. number of tokens to predict per prompt (supported for synthetic prompts only)") + args = parser.parse_args() + benchmark(**vars(args)) diff --git a/scripts/sync-ggml-am.sh b/scripts/sync-ggml-am.sh index 204354209f2d6..29d30e0a188a1 100755 --- a/scripts/sync-ggml-am.sh +++ b/scripts/sync-ggml-am.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Synchronize ggml changes to llama.cpp # @@ -83,7 +83,6 @@ while read c; do src/ggml-cpu/* \ src/ggml-cuda/* \ src/ggml-hip/* \ - src/ggml-kompute/* \ src/ggml-metal/* \ src/ggml-musa/* \ src/ggml-opencl/* \ @@ -141,7 +140,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then # src/ggml-cpu/* -> ggml/src/ggml-cpu/* # src/ggml-cuda/* -> ggml/src/ggml-cuda/* # src/ggml-hip/* -> ggml/src/ggml-hip/* - # src/ggml-kompute/* -> ggml/src/ggml-kompute/* # src/ggml-metal/* -> ggml/src/ggml-metal/* # src/ggml-musa/* -> ggml/src/ggml-musa/* # src/ggml-opencl/* -> ggml/src/ggml-opencl/* @@ -174,7 +172,6 @@ if [ -f $SRC_LLAMA/ggml-src.patch ]; then -e 's/([[:space:]]| [ab]\/)src\/ggml-cpu\//\1ggml\/src\/ggml-cpu\//g' \ -e 's/([[:space:]]| [ab]\/)src\/ggml-cuda\//\1ggml\/src\/ggml-cuda\//g' \ -e 's/([[:space:]]| [ab]\/)src\/ggml-hip\//\1ggml\/src\/ggml-hip\//g' \ - -e 's/([[:space:]]| [ab]\/)src\/ggml-kompute\//\1ggml\/src\/ggml-kompute\//g' \ -e 's/([[:space:]]| [ab]\/)src\/ggml-metal\//\1ggml\/src\/ggml-metal\//g' \ -e 's/([[:space:]]| [ab]\/)src\/ggml-opencl\//\1ggml\/src\/ggml-opencl\//g' \ -e 's/([[:space:]]| [ab]\/)src\/ggml-rpc\//\1ggml\/src\/ggml-rpc\//g' \ diff --git a/scripts/sync-ggml.last b/scripts/sync-ggml.last index 914fe47ff6a34..ca009adb83bed 100644 --- a/scripts/sync-ggml.last +++ b/scripts/sync-ggml.last @@ -1 +1 @@ -6a7d170c04789f6ebcf320ed03c1b16973f93bd7 +d62df60a07ba3deeb85e5cfc9b1ee07645ff35e2 diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index aa1a46b4bfccd..9b98329e09cb6 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash cp -rpv ../ggml/CMakeLists.txt ./ggml/CMakeLists.txt cp -rpv ../ggml/src/CMakeLists.txt ./ggml/src/CMakeLists.txt @@ -15,7 +15,6 @@ cp -rpv ../ggml/src/ggml-cann/* ./ggml/src/ggml-cann/ cp -rpv ../ggml/src/ggml-cpu/* ./ggml/src/ggml-cpu/ cp -rpv ../ggml/src/ggml-cuda/* ./ggml/src/ggml-cuda/ cp -rpv ../ggml/src/ggml-hip/* ./ggml/src/ggml-hip/ -cp -rpv ../ggml/src/ggml-kompute/* ./ggml/src/ggml-kompute/ cp -rpv ../ggml/src/ggml-metal/* ./ggml/src/ggml-metal/ cp -rpv ../ggml/src/ggml-musa/* ./ggml/src/ggml-musa/ cp -rpv ../ggml/src/ggml-opencl/* ./ggml/src/ggml-opencl/ diff --git a/scripts/tool_bench.sh b/scripts/tool_bench.sh index 6c7616a88fe5b..05b41d2f1fafb 100755 --- a/scripts/tool_bench.sh +++ b/scripts/tool_bench.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -euo pipefail cmake --build build -j diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 70be604e4b0d3..8f9cd652447ab 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -22,8 +22,9 @@ add_library(llama llama-io.cpp llama-kv-cache-unified.cpp llama-kv-cache-unified-iswa.cpp - llama-kv-cache-recurrent.cpp llama-memory.cpp + llama-memory-hybrid.cpp + llama-memory-recurrent.cpp llama-mmap.cpp llama-model-loader.cpp llama-model-saver.cpp diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index 43fa60a8070b7..df3fc5d3e74f8 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -20,6 +20,7 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_BERT, "bert" }, { LLM_ARCH_NOMIC_BERT, "nomic-bert" }, { LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" }, + { LLM_ARCH_NEO_BERT, "neo-bert" }, { LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" }, { LLM_ARCH_BLOOM, "bloom" }, { LLM_ARCH_STABLELM, "stablelm" }, @@ -33,6 +34,7 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_PHI3, "phi3" }, { LLM_ARCH_PHIMOE, "phimoe" }, { LLM_ARCH_PLAMO, "plamo" }, + { LLM_ARCH_PLAMO2, "plamo2" }, { LLM_ARCH_CODESHELL, "codeshell" }, { LLM_ARCH_ORION, "orion" }, { LLM_ARCH_INTERNLM2, "internlm2" }, @@ -41,8 +43,12 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_GEMMA, "gemma" }, { LLM_ARCH_GEMMA2, "gemma2" }, { LLM_ARCH_GEMMA3, "gemma3" }, + { LLM_ARCH_GEMMA3N, "gemma3n" }, { LLM_ARCH_STARCODER2, "starcoder2" }, { LLM_ARCH_MAMBA, "mamba" }, + { LLM_ARCH_MAMBA2, "mamba2" }, + { LLM_ARCH_JAMBA, "jamba" }, + { LLM_ARCH_FALCON_H1, "falcon-h1" }, { LLM_ARCH_XVERSE, "xverse" }, { LLM_ARCH_COMMAND_R, "command-r" }, { LLM_ARCH_COHERE2, "cohere2" }, @@ -68,10 +74,19 @@ static const std::map LLM_ARCH_NAMES = { { LLM_ARCH_ARWKV7, "arwkv7" }, { LLM_ARCH_GRANITE, "granite" }, { LLM_ARCH_GRANITE_MOE, "granitemoe" }, + { LLM_ARCH_GRANITE_HYBRID, "granitehybrid" }, { LLM_ARCH_CHAMELEON, "chameleon" }, { LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" }, { LLM_ARCH_PLM, "plm" }, { LLM_ARCH_BAILINGMOE, "bailingmoe" }, + { LLM_ARCH_DOTS1, "dots1" }, + { LLM_ARCH_ARCEE, "arcee" }, + { LLM_ARCH_ERNIE4_5, "ernie4_5" }, + { LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" }, + { LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" }, + { LLM_ARCH_SMOLLM3, "smollm3" }, + { LLM_ARCH_LFM2, "lfm2" }, + { LLM_ARCH_DREAM, "dream" }, { LLM_ARCH_UNKNOWN, "(unknown)" }, }; @@ -164,6 +179,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_SSM_INNER_SIZE, "%s.ssm.inner_size" }, { LLM_KV_SSM_STATE_SIZE, "%s.ssm.state_size" }, { LLM_KV_SSM_TIME_STEP_RANK, "%s.ssm.time_step_rank" }, + { LLM_KV_SSM_GROUP_COUNT, "%s.ssm.group_count" }, { LLM_KV_SSM_DT_B_C_RMS, "%s.ssm.dt_b_c_rms" }, { LLM_KV_WKV_HEAD_SIZE, "%s.wkv.head_size" }, @@ -176,6 +192,8 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_CLASSIFIER_OUTPUT_LABELS, "%s.classifier.output_labels" }, + { LLM_KV_SHORTCONV_L_CACHE, "%s.shortconv.l_cache" }, + { LLM_KV_TOKENIZER_MODEL, "tokenizer.ggml.model" }, { LLM_KV_TOKENIZER_PRE, "tokenizer.ggml.pre" }, { LLM_KV_TOKENIZER_LIST, "tokenizer.ggml.tokens" }, @@ -194,6 +212,7 @@ static const std::map LLM_KV_NAMES = { { LLM_KV_TOKENIZER_MASK_ID, "tokenizer.ggml.mask_token_id" }, { LLM_KV_TOKENIZER_ADD_BOS, "tokenizer.ggml.add_bos_token" }, { LLM_KV_TOKENIZER_ADD_EOS, "tokenizer.ggml.add_eos_token" }, + { LLM_KV_TOKENIZER_ADD_SEP, "tokenizer.ggml.add_sep_token" }, { LLM_KV_TOKENIZER_ADD_PREFIX, "tokenizer.ggml.add_space_prefix" }, { LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, "tokenizer.ggml.remove_extra_whitespaces" }, { LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, "tokenizer.ggml.precompiled_charsmap" }, @@ -243,6 +262,24 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, }, }, + { + LLM_ARCH_ARCEE, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ROPE_FREQS, "rope_freqs" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, { LLM_ARCH_LLAMA4, { @@ -494,6 +531,21 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, }, }, + { + LLM_ARCH_NEO_BERT, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" }, + { LLM_TENSOR_CLS, "cls" }, + { LLM_TENSOR_CLS_OUT, "cls.output" }, + }, + }, { LLM_ARCH_JINA_BERT_V2, { @@ -735,6 +787,36 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, }, }, + { + LLM_ARCH_PLAMO2, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ROPE_FREQS, "rope_freqs" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" }, + { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, + { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, + { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, + { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" }, + { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, + { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, + { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, + { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, + { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" }, + { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" }, + { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" }, + { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" }, + { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" }, + }, + }, { LLM_ARCH_CODESHELL, { @@ -894,6 +976,42 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" }, }, }, + { + LLM_ARCH_GEMMA3N, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" }, + { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "per_layer_token_embd" }, + { LLM_TENSOR_PER_LAYER_MODEL_PROJ, "per_layer_model_proj" }, + { LLM_TENSOR_PER_LAYER_PROJ_NORM, "per_layer_proj_norm" }, + { LLM_TENSOR_ALTUP_UNEMBD_PROJ, "altup_unembd_proj" }, + { LLM_TENSOR_ALTUP_PROJ, "altup_proj" }, + { LLM_TENSOR_PER_LAYER_INP_GATE, "blk.%d.inp_gate" }, + { LLM_TENSOR_PER_LAYER_PROJ, "blk.%d.proj" }, + { LLM_TENSOR_PER_LAYER_POST_NORM, "blk.%d.post_norm" }, + { LLM_TENSOR_ALTUP_CORRECT_COEF, "blk.%d.altup_correct_coef" }, + { LLM_TENSOR_ALTUP_CORRECT_SCALE, "blk.%d.altup_correct_scale" }, + { LLM_TENSOR_ALTUP_PREDICT_COEF, "blk.%d.altup_predict_coef" }, + { LLM_TENSOR_ALTUP_ROUTER, "blk.%d.altup_router" }, + { LLM_TENSOR_ALTUP_ROUTER_NORM, "blk.%d.altup_router_norm" }, + { LLM_TENSOR_LAUREL_L, "blk.%d.laurel_l" }, + { LLM_TENSOR_LAUREL_R, "blk.%d.laurel_r" }, + { LLM_TENSOR_LAUREL_POST_NORM, "blk.%d.laurel_post_norm" }, + }, + }, { LLM_ARCH_STARCODER2, { @@ -928,6 +1046,77 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, }, }, + { + LLM_ARCH_MAMBA2, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, + { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, + { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, + { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, + { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, + { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" }, + { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, + }, + }, + { + LLM_ARCH_JAMBA, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, + { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, + { LLM_TENSOR_SSM_X, "blk.%d.ssm_x" }, + { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, + { LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" }, + { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, + { LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" }, + { LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" }, + { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, + { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + }, + }, + { + LLM_ARCH_FALCON_H1, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, + { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, + { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, + { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, + { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, + { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" }, + { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, { LLM_ARCH_XVERSE, { @@ -1488,6 +1677,43 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, }, }, + { + LLM_ARCH_GRANITE_HYBRID, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + // mamba(2) ssm layers + { LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" }, + { LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" }, + { LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" }, + { LLM_TENSOR_SSM_A, "blk.%d.ssm_a" }, + { LLM_TENSOR_SSM_D, "blk.%d.ssm_d" }, + { LLM_TENSOR_SSM_NORM, "blk.%d.ssm_norm" }, + { LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" }, + // attention layers + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + // dense FFN + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + // moe FFN + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + // shared expert + { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" }, + { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" }, + { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, + }, + }, { LLM_ARCH_CHAMELEON, { @@ -1555,12 +1781,160 @@ static const std::map> LLM_TENSOR_N { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, }, }, + { + LLM_ARCH_DOTS1, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + { LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" }, + { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" }, + { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" }, + { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, + { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" }, + } + }, + { + LLM_ARCH_ERNIE4_5, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, + { + LLM_ARCH_ERNIE4_5_MOE, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" }, + { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" }, + { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + { LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" }, + }, + }, + { + LLM_ARCH_HUNYUAN_MOE, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" }, + { LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" }, + { LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" }, + { LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" }, + { LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" }, + { LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" }, + }, + }, + { + LLM_ARCH_SMOLLM3, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, + { + LLM_ARCH_LFM2, + { + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" }, + { LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + { LLM_TENSOR_SHORTCONV_CONV, "blk.%d.shortconv.conv" }, + { LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" }, + { LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" }, + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, + } + }, { LLM_ARCH_UNKNOWN, { { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, }, }, + { + LLM_ARCH_DREAM, + { + { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, + { LLM_TENSOR_OUTPUT_NORM, "output_norm" }, + { LLM_TENSOR_OUTPUT, "output" }, + { LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" }, + { LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" }, + { LLM_TENSOR_ATTN_K, "blk.%d.attn_k" }, + { LLM_TENSOR_ATTN_V, "blk.%d.attn_v" }, + { LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" }, + { LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" }, + { LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" }, + { LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" }, + { LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" }, + }, + }, }; static const std::map LLM_TENSOR_INFOS = { @@ -1639,7 +2013,11 @@ static const std::map LLM_TENSOR_INFOS = { {LLM_TENSOR_FFN_ACT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_DIV}}, {LLM_TENSOR_SSM_CONV1D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}}, {LLM_TENSOR_SSM_A, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_SCAN}}, + {LLM_TENSOR_SSM_DT_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_SSM_B_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_SSM_C_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, {LLM_TENSOR_SSM_D, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_SSM_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, {LLM_TENSOR_TIME_MIX_LERP_X, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, {LLM_TENSOR_TIME_MIX_LN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, {LLM_TENSOR_CHANNEL_MIX_LERP_K, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, @@ -1683,6 +2061,23 @@ static const std::map LLM_TENSOR_INFOS = { {LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, {LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}}, {LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}}, + // altup / laurel (gemma 3n) + {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}}, + {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}}, + {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, // this tensor is loaded for T5, but never used {LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}}, {LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}}, @@ -1701,6 +2096,9 @@ static const std::map LLM_TENSOR_INFOS = { {LLM_TENSOR_CONVNEXT_PW1, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, {LLM_TENSOR_CONVNEXT_PW2, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, {LLM_TENSOR_CONVNEXT_GAMMA, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}}, + {LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}}, + {LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, + {LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}}, }; LLM_KV::LLM_KV(llm_arch arch, const char * suffix) : arch(arch), suffix(suffix) {} @@ -1752,3 +2150,39 @@ llm_arch llm_arch_from_string(const std::string & name) { const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor) { return LLM_TENSOR_INFOS.at(tensor); } + +bool llm_arch_is_recurrent(const llm_arch & arch) { + switch (arch) { + case LLM_ARCH_MAMBA: + case LLM_ARCH_MAMBA2: + case LLM_ARCH_RWKV6: + case LLM_ARCH_RWKV6QWEN2: + case LLM_ARCH_RWKV7: + case LLM_ARCH_ARWKV7: + return true; + default: + return false; + } +} + +bool llm_arch_is_hybrid(const llm_arch & arch) { + switch (arch) { + case LLM_ARCH_JAMBA: + case LLM_ARCH_FALCON_H1: + case LLM_ARCH_PLAMO2: + case LLM_ARCH_GRANITE_HYBRID: + case LLM_ARCH_LFM2: + return true; + default: + return false; + } +} + +bool llm_arch_is_diffusion(const llm_arch & arch) { + switch (arch) { + case LLM_ARCH_DREAM: + return true; + default: + return false; + } +} diff --git a/src/llama-arch.h b/src/llama-arch.h index f3825528aefdb..3bffe359eabe5 100644 --- a/src/llama-arch.h +++ b/src/llama-arch.h @@ -24,6 +24,7 @@ enum llm_arch { LLM_ARCH_BERT, LLM_ARCH_NOMIC_BERT, LLM_ARCH_NOMIC_BERT_MOE, + LLM_ARCH_NEO_BERT, LLM_ARCH_JINA_BERT_V2, LLM_ARCH_BLOOM, LLM_ARCH_STABLELM, @@ -37,6 +38,7 @@ enum llm_arch { LLM_ARCH_PHI3, LLM_ARCH_PHIMOE, LLM_ARCH_PLAMO, + LLM_ARCH_PLAMO2, LLM_ARCH_CODESHELL, LLM_ARCH_ORION, LLM_ARCH_INTERNLM2, @@ -45,8 +47,12 @@ enum llm_arch { LLM_ARCH_GEMMA, LLM_ARCH_GEMMA2, LLM_ARCH_GEMMA3, + LLM_ARCH_GEMMA3N, LLM_ARCH_STARCODER2, LLM_ARCH_MAMBA, + LLM_ARCH_MAMBA2, + LLM_ARCH_JAMBA, + LLM_ARCH_FALCON_H1, LLM_ARCH_XVERSE, LLM_ARCH_COMMAND_R, LLM_ARCH_COHERE2, @@ -72,10 +78,19 @@ enum llm_arch { LLM_ARCH_ARWKV7, LLM_ARCH_GRANITE, LLM_ARCH_GRANITE_MOE, + LLM_ARCH_GRANITE_HYBRID, LLM_ARCH_CHAMELEON, LLM_ARCH_WAVTOKENIZER_DEC, LLM_ARCH_PLM, LLM_ARCH_BAILINGMOE, + LLM_ARCH_DOTS1, + LLM_ARCH_ARCEE, + LLM_ARCH_ERNIE4_5, + LLM_ARCH_ERNIE4_5_MOE, + LLM_ARCH_HUNYUAN_MOE, + LLM_ARCH_SMOLLM3, + LLM_ARCH_LFM2, + LLM_ARCH_DREAM, LLM_ARCH_UNKNOWN, }; @@ -168,6 +183,7 @@ enum llm_kv { LLM_KV_SSM_CONV_KERNEL, LLM_KV_SSM_STATE_SIZE, LLM_KV_SSM_TIME_STEP_RANK, + LLM_KV_SSM_GROUP_COUNT, LLM_KV_SSM_DT_B_C_RMS, LLM_KV_WKV_HEAD_SIZE, @@ -190,6 +206,7 @@ enum llm_kv { LLM_KV_TOKENIZER_MASK_ID, LLM_KV_TOKENIZER_ADD_BOS, LLM_KV_TOKENIZER_ADD_EOS, + LLM_KV_TOKENIZER_ADD_SEP, LLM_KV_TOKENIZER_ADD_PREFIX, LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, @@ -214,6 +231,8 @@ enum llm_kv { LLM_KV_CLASSIFIER_OUTPUT_LABELS, + LLM_KV_SHORTCONV_L_CACHE, + // deprecated: LLM_KV_TOKENIZER_PREFIX_ID, LLM_KV_TOKENIZER_SUFFIX_ID, @@ -264,12 +283,32 @@ enum llm_tensor { LLM_TENSOR_LAYER_OUT_NORM, LLM_TENSOR_POST_ATTN_NORM, LLM_TENSOR_POST_MLP_NORM, + LLM_TENSOR_PER_LAYER_TOKEN_EMBD, // gemma3n + LLM_TENSOR_PER_LAYER_MODEL_PROJ, // gemma3n + LLM_TENSOR_PER_LAYER_INP_GATE, // gemma3n + LLM_TENSOR_PER_LAYER_PROJ, // gemma3n + LLM_TENSOR_PER_LAYER_PROJ_NORM, // gemma3n + LLM_TENSOR_PER_LAYER_POST_NORM, // gemma3n + LLM_TENSOR_ALTUP_PROJ, // gemma3n + LLM_TENSOR_ALTUP_UNEMBD_PROJ, // gemma3n + LLM_TENSOR_ALTUP_CORRECT_COEF, // gemma3n + LLM_TENSOR_ALTUP_CORRECT_SCALE, // gemma3n + LLM_TENSOR_ALTUP_PREDICT_COEF, // gemma3n + LLM_TENSOR_ALTUP_ROUTER, // gemma3n + LLM_TENSOR_ALTUP_ROUTER_NORM, // gemma3n + LLM_TENSOR_LAUREL_L, // gemma3n + LLM_TENSOR_LAUREL_R, // gemma3n + LLM_TENSOR_LAUREL_POST_NORM, // gemma3n LLM_TENSOR_SSM_IN, LLM_TENSOR_SSM_CONV1D, LLM_TENSOR_SSM_X, LLM_TENSOR_SSM_DT, + LLM_TENSOR_SSM_DT_NORM, LLM_TENSOR_SSM_A, + LLM_TENSOR_SSM_B_NORM, + LLM_TENSOR_SSM_C_NORM, LLM_TENSOR_SSM_D, + LLM_TENSOR_SSM_NORM, LLM_TENSOR_SSM_OUT, LLM_TENSOR_TIME_MIX_W0, LLM_TENSOR_TIME_MIX_W1, @@ -363,6 +402,9 @@ enum llm_tensor { LLM_TENSOR_POS_NET_ATTN_K, LLM_TENSOR_POS_NET_ATTN_V, LLM_TENSOR_POS_NET_ATTN_OUT, + LLM_TENSOR_SHORTCONV_CONV, + LLM_TENSOR_SHORTCONV_INPROJ, + LLM_TENSOR_SHORTCONV_OUTPROJ, }; enum llm_tensor_layer { @@ -436,3 +478,7 @@ const char * llm_arch_name(llm_arch arch); llm_arch llm_arch_from_string(const std::string & name); const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor); + +bool llm_arch_is_recurrent(const llm_arch & arch); +bool llm_arch_is_hybrid (const llm_arch & arch); +bool llm_arch_is_diffusion(const llm_arch & arch); diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index bdbf766266f90..a546063c0a7c8 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -1,393 +1,792 @@ #include "llama-batch.h" #include "llama-impl.h" -#include "llama-cparams.h" #include "llama-vocab.h" +#include "llama-memory.h" #include #include #include #include -llama_ubatch llama_sbatch::reserve_ubatch(size_t n_ubatch, bool has_embd) { - // clear empty sequences - // the previous ubatch is assumed to be gone, - // so nothing should refer to values in these sequences anymore. - for (size_t i = seq.size(); i-- > 0;) { - if (seq[i].length == 0) { - seq.pop_back(); - } else { - break; - } +llama_batch_allocr::llama_batch_allocr(uint32_t n_pos_per_embd) : n_pos_per_embd(n_pos_per_embd) { + const char * LLAMA_BATCH_DEBUG = getenv("LLAMA_BATCH_DEBUG"); + debug = LLAMA_BATCH_DEBUG ? atoi(LLAMA_BATCH_DEBUG) : 0; + + seq_pos.resize(LLAMA_MAX_SEQ); + seq_cpl.resize(LLAMA_MAX_SEQ); + for (auto & cur : seq_cpl) { + cur.resize(LLAMA_MAX_SEQ); } - udatas.push_back({}); + seq_idx.resize(LLAMA_MAX_SEQ, -1); +} - auto & udata = udatas.back(); +bool llama_batch_allocr::init( + const llama_batch & batch_inp, + const llama_vocab & vocab, + const llama_memory_i * memory, + uint32_t n_embd, + uint32_t n_seq_max, + bool output_all) { + clear(); - udata.token.resize(!has_embd ? n_ubatch : 0); - udata.embd.resize(has_embd ? n_embd * n_ubatch : 0); - udata.pos.resize(n_ubatch); - udata.n_seq_id.resize(n_ubatch); - udata.seq_id.resize(n_ubatch); - udata.output.resize(n_ubatch); + batch = batch_inp; - llama_ubatch ubatch = { - /*equal_seqs =*/ true, - /*n_tokens =*/ 0, - /*n_seq_tokens =*/ 0, - /*n_seqs =*/ 0, - /*token =*/ !has_embd ? udata.token.data() : nullptr, - /*embd =*/ has_embd ? udata.embd.data() : nullptr, - /*pos =*/ udata.pos.data(), - /*n_seq_id =*/ udata.n_seq_id.data(), - /*seq_id =*/ udata.seq_id.data(), - /*output =*/ udata.output.data(), - }; + this->vocab = &vocab; - return ubatch; -} + GGML_ASSERT(batch.n_tokens > 0); + + // + // validate input batch + // + + if (n_seq_max > LLAMA_MAX_SEQ) { + LLAMA_LOG_ERROR("%s: n_seq_max = %d > %d\n", __func__, n_seq_max, LLAMA_MAX_SEQ); + return false; + } -void llama_sbatch::add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length) { - GGML_ASSERT(batch != nullptr); - GGML_ASSERT(length <= seq.length); - // Can only add sequences of equal lengths to a batch, - // otherwise it isn't clear to which sequence a token belongs - GGML_ASSERT(seq.n_seq_id == 0 || ubatch.n_seqs == 0 || length == (size_t) ubatch.n_tokens / ubatch.n_seqs); - GGML_ASSERT((seq.n_seq_id != 0) == ubatch.equal_seqs); - // NOTE: loops are separated for cache-friendliness - if (batch->token) { - if (ubatch.equal_seqs) { - for (size_t i = 0; i < length; ++i) { - ubatch.token[ubatch.n_tokens + i] = batch->token[ids[seq.offset + i]]; + if (batch.token) { + for (int32_t i = 0; i < batch.n_tokens; ++i) { + if (batch.token[i] < 0 || (uint32_t) batch.token[i] >= vocab.n_tokens()) { + LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]); + return false; } - } else { - // simple split - ubatch.token = batch->token + seq.offset; } - } else { - ubatch.token = nullptr; - } - if (batch->embd) { - if (ubatch.equal_seqs) { - for (size_t i = 0; i < length; ++i) { - memcpy( - ubatch.embd + (n_embd * (ubatch.n_tokens + i)), - batch->embd + (n_embd * ids[seq.offset + i]), - n_embd * sizeof(float) - ); + } + + if (batch.seq_id) { + for (int32_t i = 0; i < batch.n_tokens; ++i) { + for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) { + if (batch.seq_id && (batch.seq_id[i][s] < 0 || batch.seq_id[i][s] >= (llama_seq_id) n_seq_max)) { + LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d > %d\n", __func__, i, s, batch.seq_id[i][s], (llama_seq_id) n_seq_max); + return false; + } } - } else { - // simple split - ubatch.embd = batch->embd + (n_embd * seq.offset); } - } else { - ubatch.embd = nullptr; } - if (ubatch.equal_seqs) { - for (size_t i = 0; i < length; ++i) { - ubatch.pos[ubatch.n_tokens + i] = batch->pos[ids[seq.offset + i]]; + + // + // auto-generate missing fields + // + + if (!batch.n_seq_id) { + n_seq_id.resize(batch.n_tokens); + for (int32_t i = 0; i < batch.n_tokens; i++) { + n_seq_id[i] = seq_id_0.size(); } - } else { - // simple split - ubatch.pos = batch->pos + seq.offset; + batch.n_seq_id = n_seq_id.data(); } - if (ubatch.equal_seqs) { - ubatch.n_seq_id[ubatch.n_seqs] = seq.n_seq_id; - if (seq.seq_id) { - ubatch.seq_id[ubatch.n_seqs] = seq.seq_id; + + if (!batch.seq_id) { + seq_id.resize(batch.n_tokens + 1); + seq_id[batch.n_tokens] = NULL; + for (int32_t i = 0; i < batch.n_tokens; i++) { + seq_id[i] = seq_id_0.data(); } - } else { - // simple split - if (batch->n_seq_id) { - ubatch.n_seq_id = batch->n_seq_id + seq.offset; - } else { - for (size_t i = 0; i < length; ++i) { - ubatch.n_seq_id[ubatch.n_seqs + i] = 1; + batch.seq_id = seq_id.data(); + } + + if (!batch.pos) { + pos.resize(batch.n_tokens); + + // initialize the starting position for each sequence based on the positions in the memory + llama_pos p0[LLAMA_MAX_SEQ]; + for (uint32_t s = 0; s < n_seq_max; ++s) { + if (!memory) { + // if no memory -> start from 0 + p0[s] = 0; + } else { + p0[s] = memory->seq_pos_max(s) + 1; } } - if (batch->seq_id) { - ubatch.seq_id = batch->seq_id + seq.offset; + + for (int32_t i = 0; i < batch.n_tokens; i++) { + const llama_seq_id seq_id = batch.seq_id[i][0]; + + pos[i] = p0[seq_id]; + + // update the starting position for all sequences that are assigned to the this token + for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) { + const llama_seq_id seq_id = batch.seq_id[i][s]; + + p0[seq_id] = pos[i] + 1; + } } + + batch.pos = pos.data(); } - if (batch->logits) { - if (ubatch.equal_seqs) { - for (size_t i = 0; i < length; ++i) { - size_t id = ids[seq.offset + i]; - int8_t is_output = batch->logits[id]; - ubatch.output[ubatch.n_tokens + i] = is_output; - if (is_output) { out_ids.push_back(id); } - } + + if (!batch.logits) { + if (output_all) { + // return the output for all tokens + output.resize(batch.n_tokens, true); } else { - // simple split - ubatch.output = batch->logits + seq.offset; - for (size_t i = 0; i < length; ++i) { - if (ubatch.output[i] != 0) { out_ids.push_back(seq.offset + i); } - } + // return the output only for the last token + output.resize(batch.n_tokens, false); + output[output.size() - 1] = true; } - } else { - // only get last output - for (size_t i = 0; i < length; ++i) { - size_t id = ids[seq.offset + i]; - int8_t is_last = id == ids.size() - 1; - ubatch.output[ubatch.n_tokens + i] = is_last; - if (is_last) { out_ids.push_back(id); } - } - } - if (ubatch.n_tokens == 0 && ubatch.n_seqs == 0) { - ubatch.n_seq_tokens = ubatch.equal_seqs ? length : 1; - } - ubatch.n_tokens += length; - ubatch.n_seqs += ubatch.equal_seqs ? 1 : length; // virtual sequences for simple splits - seq.offset += length; - seq.length -= length; - n_tokens -= length; - GGML_ASSERT(ubatch.n_tokens == ubatch.n_seq_tokens * ubatch.n_seqs); -} -llama_ubatch llama_sbatch::split_simple(size_t n_ubatch) { - n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch; - llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr); - ubatch.equal_seqs = false; - if (!seq.empty()) { - llama_sbatch_seq & s = seq[0]; - size_t length = s.length < n_ubatch ? s.length : n_ubatch; - GGML_ASSERT(seq.size() == 1 && s.n_seq_id == 0); // don't mix with other splits - add_seq_to_ubatch(ubatch, s, length); - } - return ubatch; -} + batch.logits = output.data(); + } else if (output_all) { + bool warn = false; -llama_ubatch llama_sbatch::split_equal(size_t n_ubatch) { - n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch; - llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr); - if (!seq.empty()) { - size_t length = 0; - size_t n_tokens_in_ubatch = 0; - GGML_ASSERT(seq[0].n_seq_id > 0); // should not be mixed with simple splits - // smallest first, because it's easier to split this way; - // starting from the end to pop in constant time. - for (size_t i = seq.size(); i-- > 0;) { - llama_sbatch_seq & s = seq[i]; - GGML_ASSERT(s.length > 0); - if (length == 0) { - length = s.length < n_ubatch ? s.length : n_ubatch; + for (int32_t i = 0; i < batch.n_tokens; ++i) { + if (batch.logits[i] == 0) { + warn = true; } - add_seq_to_ubatch(ubatch, s, length); - n_tokens_in_ubatch += length; - // shared prompts can't be mixed with any of their sequences, - // so it's safer to compute them in their own ubatch - if (s.n_seq_id > 1) { break; } - // stop when there isn't enough space for another sequence - if (length + n_tokens_in_ubatch > n_ubatch) { break; } + } + + if (warn) { + LLAMA_LOG_WARN("%s: embeddings required but some input tokens were not marked as outputs -> overriding\n", __func__); + + output.resize(batch.n_tokens, true); + batch.logits = output.data(); } } - return ubatch; -} -llama_ubatch llama_sbatch::split_seq(size_t n_ubatch) { - n_ubatch = n_tokens < n_ubatch ? n_tokens : n_ubatch; - llama_ubatch ubatch = reserve_ubatch(n_ubatch, /* has_embd */ batch->embd != nullptr); - if (!seq.empty()) { - llama_sbatch_seq & s = seq[seq.size() - 1]; - size_t length = s.length < n_ubatch ? s.length : n_ubatch; - GGML_ASSERT(s.n_seq_id > 0); // should not be mixed with simple splits - add_seq_to_ubatch(ubatch, s, length); + // + // compute stats + // + + this->n_embd = n_embd; + this->n_seq_max = n_seq_max; + + // count the outputs in this batch + for (int32_t i = 0; i < batch.n_tokens; ++i) { + n_outputs += batch.logits[i] != 0; } - return ubatch; -} -llama_sbatch::llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split) { - GGML_ASSERT(batch.n_tokens >= 0); - this->batch = &batch; - this->n_embd = n_embd; + has_cpl = false; - n_tokens = batch.n_tokens; - ids.resize(n_tokens); - out_ids.clear(); - // TODO: reserve out_ids and seq - - for (size_t i = 0; i < n_tokens; ++i) { - ids[i] = i; - } - - if (simple_split) { - seq.resize(1); - llama_sbatch_seq & s = seq[0]; - s.n_seq_id = 0; - s.seq_id = nullptr; - s.offset = 0; - s.length = n_tokens; - return; - } - - std::sort(ids.begin(), ids.end(), - [&batch](size_t a, size_t b) { - int32_t n_seq_a = batch.n_seq_id ? batch.n_seq_id[a] : 1; - int32_t n_seq_b = batch.n_seq_id ? batch.n_seq_id[b] : 1; - // sort by seq_id, then by pos - if (n_seq_a == n_seq_b) { - if (batch.seq_id) { - for (int32_t i = 0; i < n_seq_a; ++i) { - llama_seq_id seq_id_a = batch.seq_id[a][i]; - llama_seq_id seq_id_b = batch.seq_id[b][i]; - // smaller seq_ids go first - if (seq_id_a != seq_id_b) { - return seq_id_a < seq_id_b; - } - } - } - // when all else is equal, sort by pos - if (batch.pos) { - return batch.pos[a] < batch.pos[b]; - } - // no pos, sort by id - return a < b; - } - // shared prompts go first - return n_seq_a > n_seq_b; + // determine coupled sequences + // these are pairs of sequences that have at least one token in the input batch that is assigned to both of them + for (int32_t i = 0; i < batch.n_tokens; ++i) { + const llama_seq_id s0 = batch.seq_id[i][0]; + + for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) { + const llama_seq_id s1 = batch.seq_id[i][s]; + + seq_pos[s1].insert(batch.pos[i]); + + if (s > 0) { + // mark that sequence s1 is coupled to s0 + seq_cpl[s1][s0] = true; + + // note: tracking the other way around is not necessary for now + //seq_cpl[s0][s1] = true; + + has_cpl = true; } - ); - - // init seq - llama_sbatch_seq * last_seq = nullptr; - - for (size_t i = 0; i < n_tokens; ++i) { - const size_t bi = ids[i]; - const int32_t n_seqs = batch.n_seq_id[bi]; - llama_seq_id * seq_ids = batch.seq_id[bi]; - if (last_seq != nullptr) { - bool same = n_seqs == last_seq->n_seq_id; - for (int32_t j = 0; same && j < n_seqs; ++j) { - if (seq_ids[j] != last_seq->seq_id[j]) { - same = false; - } + } + } + + // precompute the sequence sets for each token and determine the unique sequence ids that participate in the batch + { + seq_set_t seq_set_unq; + + for (int32_t i = 0; i < batch.n_tokens; ++i) { + seq_set_t cur; + for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) { + const llama_seq_id seq_id = batch.seq_id[i][s]; + + cur .set(seq_id); + seq_set_unq.set(seq_id); } - if (same) { - last_seq->length += 1; - continue; + + seq_set.push_back(cur); + seq_set_map[cur].push_back(i); + } + + for (uint32_t s = 0; s < n_seq_max; ++s) { + if (seq_set_unq.test(s)) { + seq_idx[s] = seq_id_unq.size(); + seq_id_unq.push_back(s); } } - llama_sbatch_seq new_seq = {n_seqs, seq_ids, i, 1}; - seq.push_back(new_seq); - last_seq = &seq.back(); } - // keep shared prompts first at the end, then sort by length descending. - std::sort(seq.begin(), seq.end(), - [](llama_sbatch_seq & a, llama_sbatch_seq & b) { - if (a.n_seq_id == b.n_seq_id) { - return a.length > b.length; + if (debug > 0) { + LLAMA_LOG_DEBUG("%s: input batch info:\n", __func__); + + llama_ubatch ubatch { + /*.b_equal_seqs =*/ false, + /*.n_tokens =*/ (uint32_t) batch.n_tokens, + /*.n_seq_tokens =*/ (uint32_t) 1, + /*.n_seqs =*/ (uint32_t) batch.n_tokens, + /*.n_seqs_unq =*/ (uint32_t) this->seq_id_unq.size(), + /*.token =*/ batch.token, + /*.embd =*/ batch.embd, + /*.pos =*/ batch.pos, + /*.n_seq_id =*/ batch.n_seq_id, + /*.seq_id =*/ batch.seq_id, + /*.seq_id_unq =*/ this->seq_id_unq.data(), + /*.seq_idx =*/ this->seq_idx.data(), + /*.output =*/ batch.logits, + /*.data =*/ {}, + }; + + ubatch_print(ubatch, debug); + + LLAMA_LOG_DEBUG("%s: seq = [\n", __func__); + for (int s0 = 0; s0 < (int) seq_pos.size(); ++s0) { + if (seq_pos[s0].empty()) { + continue; + } + + std::stringstream ss; + for (int s1 = 0; s1 < (int) seq_cpl[s0].size(); ++s1) { + if (seq_cpl[s0][s1]) { + ss << s1 << " "; } - return a.n_seq_id < b.n_seq_id; } - ); -} -llama_batch_allocr::llama_batch_allocr() { - const char * LLAMA_BATCH_DEBUG = getenv("LLAMA_BATCH_DEBUG"); - debug = LLAMA_BATCH_DEBUG ? atoi(LLAMA_BATCH_DEBUG) : 0; -} + LLAMA_LOG_DEBUG("%s: %4d: pos = [%4d, %4d], cpl = %s\n", + __func__, s0, seq_pos_min(s0), seq_pos_max(s0), ss.str().empty() ? "-" : ss.str().c_str()); + } + LLAMA_LOG_DEBUG("%s: ]\n", __func__); + } -bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & vocab, llama_pos p0) { - clear(); + // + // consistency checks + // - batch = batch_inp; + for (uint32_t s = 0; s < n_seq_max; ++s) { + if (seq_pos[s].empty()) { + continue; + } - GGML_ASSERT(batch.n_tokens > 0); + const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1; - if (!batch.pos) { - if (batch.seq_id) { - LLAMA_LOG_ERROR("%s: pos == NULL, but seq_id != NULL\n", __func__); + if (p0 >= 0) { + bool ok = true; + + if (batch.token) { + if (seq_pos_min(s) != p0 + 1) { + ok = false; + } + } else { + assert(batch.embd); + + // for embeddings (typically used as vision input), we allow them to have repeating positions + // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762 + if (seq_pos_min(s) != p0 && seq_pos_min(s) != p0 + 1) { + ok = false; + } + } + + if (!ok) { + LLAMA_LOG_ERROR( + "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n" + " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n" + " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n" + " it is required that the sequence positions remain consecutive: Y = X + 1\n", + __func__, s, s, p0, s, seq_pos_min(s)); + + return false; + } + } + + if (seq_pos_max(s) - seq_pos_min(s) + 1 > (int) seq_pos[s].size()) { + LLAMA_LOG_ERROR("%s: sequence %d positions are not continuous\n", __func__, s); return false; } } - if (batch.token) { - for (int32_t i = 0; i < batch.n_tokens; ++i) { - if (batch.token[i] < 0 || (uint32_t) batch.token[i] >= vocab.n_tokens()) { - LLAMA_LOG_ERROR("%s: invalid token[%d] = %d\n", __func__, i, batch.token[i]); - return false; + if (memory) { + for (uint32_t s0 = 0; s0 < n_seq_max; ++s0) { + for (uint32_t s1 = 0; s1 < n_seq_max; ++s1) { + if (seq_cpl[s0][s1]) { + if (memory->seq_pos_min(s0) != memory->seq_pos_min(s1) || + memory->seq_pos_max(s0) != memory->seq_pos_max(s1)) { + LLAMA_LOG_ERROR("%s: sequence %d is coupled to %d in the input batch, but have divereged\n", __func__, s0, s1); + return false; + } + } } } } - if (batch.seq_id) { + // disallow partial sequence sub-sets: + // + // invalid: x + // i: 0 1 2 ... + // --------------------------------------- + // seq_id[i][0]: 0 0 1 + // seq_id[i][1]: 1 1 2 + // seq_id[i][2]: 2 + // + // disallow decreasing sequence positions: + // + // invalid: x + // i: 0 1 2 3 4 5 6 ... + // --------------------------------------- + // pos[i]: 4 5 0 1 6 2 3 + // seq_id[i][0]: 0 0 1 1 0 1 0 + // + { + seq_set_t cur_seq_set[LLAMA_MAX_SEQ]; + for (uint32_t s = 0; s < n_seq_max; ++s) { + cur_seq_set[s].set(); + } + + llama_pos cur_seq_pos[LLAMA_MAX_SEQ]; + for (uint32_t s = 0; s < n_seq_max; ++s) { + cur_seq_pos[s] = -1; + } + for (int32_t i = 0; i < batch.n_tokens; ++i) { + const llama_pos pos = batch.pos[i]; + for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) { - if (batch.seq_id && (batch.seq_id[i][s] < 0 || batch.seq_id[i][s] >= LLAMA_MAX_PARALLEL_SEQUENCES)) { - LLAMA_LOG_ERROR("%s: invalid seq_id[%d][%d] = %d > %d\n", __func__, i, s, batch.seq_id[i][s], LLAMA_MAX_PARALLEL_SEQUENCES); + const llama_seq_id seq_id = batch.seq_id[i][s]; + + cur_seq_set[seq_id] &= seq_set[i]; + + if (cur_seq_set[seq_id].none()) { + LLAMA_LOG_ERROR("%s: sequence %d belongs to incompatible sequence sets (not allowed)\n", __func__, seq_id); + return false; + } + + if (pos < cur_seq_pos[seq_id]) { + LLAMA_LOG_ERROR("%s: sequence %d positions are decreasing (not allowed)\n", __func__, seq_id); return false; } } } } - if (!batch.pos) { - assert(p0 >= 0); - pos.resize(batch.n_tokens); - for (int32_t i = 0; i < batch.n_tokens; i++) { - pos[i] = p0 + i; + split_reset(); + + return true; +} + +llama_ubatch llama_batch_allocr::ubatch_reserve(uint32_t n_seq_tokens, uint32_t n_seqs) { + const uint32_t n_tokens = n_seq_tokens*n_seqs; + + clear(); + split_reset(); + + auto udata = std::make_shared(); + + udata->token .resize(n_tokens); + udata->embd .clear(); + udata->pos .resize(n_tokens); + udata->n_seq_id .resize(n_tokens); + udata->seq_id .resize(n_tokens); + udata->seq_id_unq.resize(0); + udata->seq_idx .resize(LLAMA_MAX_SEQ, -1); + udata->output .resize(n_tokens); + + for (uint32_t s = 0; s < n_seqs; ++s) { + udata->seq_idx[s] = s; + udata->seq_id_unq.push_back(s); + } + + llama_ubatch res { + /*.b_equal_seqs =*/ true, + /*.n_tokens =*/ n_tokens, + /*.n_seq_tokens =*/ n_seq_tokens, + /*.n_seqs =*/ n_seqs, + /*.n_seqs_unq =*/ n_seqs, + + /*.token =*/ udata->token.data(), + /*.embd =*/ nullptr, + /*.pos =*/ udata->pos.data(), + /*.n_seq_id =*/ udata->n_seq_id.data(), + /*.seq_id =*/ udata->seq_id.data(), + /*.seq_id_unq =*/ udata->seq_id_unq.data(), + /*.seq_idx =*/ udata->seq_idx.data(), + /*.output =*/ udata->output.data(), + /*.data =*/ std::move(udata), + }; + + return res; +} + +const llama_batch & llama_batch_allocr::get_batch() const { + return batch; +} + +uint32_t llama_batch_allocr::get_n_tokens() const { + return batch.n_tokens; +} + +uint32_t llama_batch_allocr::get_n_outputs() const { + return n_outputs; +} + +uint32_t llama_batch_allocr::get_n_used() const { + return n_used; +} + +std::vector & llama_batch_allocr::get_out_ids() { + return out_ids; +} + +llama_pos llama_batch_allocr::seq_pos_min(llama_seq_id seq_id) const { + return seq_pos[seq_id].empty() ? -1 : *seq_pos[seq_id].begin(); +} + +llama_pos llama_batch_allocr::seq_pos_max(llama_seq_id seq_id) const { + return seq_pos[seq_id].empty() ? -1 : *seq_pos[seq_id].rbegin(); +} + +void llama_batch_allocr::split_reset() { + out_ids.clear(); + + n_used = 0; + + used.clear(); + used.resize(get_n_tokens(), false); +} + +llama_ubatch llama_batch_allocr::split_simple(uint32_t n_ubatch) { + // find the first unused token + uint32_t cur_idx = 0; + while (cur_idx < used.size() && used[cur_idx]) { + ++cur_idx; + } + + // we are done + if (cur_idx >= used.size()) { + return {}; + } + + std::vector idxs; + + while (true) { + idxs.push_back(cur_idx); + + used[cur_idx] = true; + ++n_used; + + ++cur_idx; + + if (cur_idx >= used.size()) { + break; + } + + if (idxs.size() >= n_ubatch) { + break; } - batch.pos = pos.data(); } - if (!batch.n_seq_id) { - n_seq_id.resize(batch.n_tokens); - for (int32_t i = 0; i < batch.n_tokens; i++) { - n_seq_id[i] = seq_id_0.size(); + return ubatch_add(idxs, idxs.size(), false); +} + +llama_ubatch llama_batch_allocr::split_equal(uint32_t n_ubatch, bool sequential) { + if (sequential && has_cpl) { + LLAMA_LOG_ERROR("%s: sequential split is not supported when there are coupled sequences in the input batch\n", __func__); + + return {}; + } + + std::vector cur_seq_set; + + llama_seq_id last_seq_id = -1; + + // determine the non-overlapping sequence sets participating in this ubatch + for (int32_t i = 0; i < batch.n_tokens; ++i) { + if (used[i]) { + continue; + } + + bool add = true; + + for (uint32_t s = 0; s < cur_seq_set.size(); ++s) { + // no overlap with existing sequence sets: + if (!(cur_seq_set[s] & seq_set[i]).none()) { + add = false; + break; + } + } + + // accept only increasing sequence ids + if (sequential) { + add = add && (cur_seq_set.empty() || batch.seq_id[i][0] == last_seq_id + 1); + } + + if (add) { + cur_seq_set.push_back(seq_set[i]); + + last_seq_id = batch.seq_id[i][0]; + + if (cur_seq_set.size() > n_ubatch) { + break; + } } - batch.n_seq_id = n_seq_id.data(); } - if (!batch.seq_id) { - seq_id.resize(batch.n_tokens + 1); - seq_id[batch.n_tokens] = NULL; - for (int32_t i = 0; i < batch.n_tokens; i++) { - seq_id[i] = seq_id_0.data(); + const uint32_t n_seqs = cur_seq_set.size(); + + // we are done + if (n_seqs == 0) { + return {}; + } + + // the current batch index of each sequence set + std::vector cur_idx(n_seqs, 0); + + for (uint32_t s = 0; s < n_seqs; ++s) { + while (used[seq_set_map[cur_seq_set[s]][cur_idx[s]]]) { + ++cur_idx[s]; } - batch.seq_id = seq_id.data(); } - if (!batch.logits) { - // by default return the output only for the last token - output.resize(batch.n_tokens); - output[output.size() - 1] = true; - batch.logits = output.data(); + // the list of batch indices for each sequence set + // at the end we will concat these to get the final ubatch + std::vector idxs_per_seq(n_seqs); + + while (true) { + // we can only add new n_seq_tokens tokens if all the sequence sets have at least one more unused token and + // if we haven't reached n_ubatch + bool can_expand = true; + + for (uint32_t s = 0; s < n_seqs; ++s) { + if (cur_idx[s] >= (int32_t) seq_set_map[cur_seq_set[s]].size()) { + can_expand = false; + break; + } + } + + if (!can_expand) { + break; + } + + for (uint32_t s = 0; s < n_seqs; ++s) { + const int32_t idx = seq_set_map[cur_seq_set[s]][cur_idx[s]]; + + idxs_per_seq[s].push_back(idx); + + used[idx] = true; + ++n_used; + + ++cur_idx[s]; + } + + if ((idxs_per_seq[0].size() + 1)*n_seqs > n_ubatch) { + break; + } } - for (int32_t i = 0; i < batch.n_tokens; ++i) { - n_outputs += batch.logits[i] != 0; + // concat the per-sequence-set lists + std::vector idxs; + + for (uint32_t s = 0; s < n_seqs; ++s) { + idxs.insert(idxs.end(), idxs_per_seq[s].begin(), idxs_per_seq[s].end()); + } + + return ubatch_add(idxs, n_seqs, true); +} + +llama_ubatch llama_batch_allocr::split_seq(uint32_t n_ubatch) { + // find the first unused token + uint32_t cur_idx = 0; + while (cur_idx < used.size() && used[cur_idx]) { + ++cur_idx; + } + + // we are done + if (cur_idx >= used.size()) { + return {}; } + // this is the starting sequence set + // we allow adding tokens only if their sequence set is a subset of the current sequence set + auto cur_seq_set = seq_set[cur_idx]; + + std::vector idxs; + + while (true) { + idxs.push_back(cur_idx); + + used[cur_idx] = true; + ++n_used; + + if (idxs.size() >= n_ubatch) { + break; + } + + do { + ++cur_idx; + } while (cur_idx < get_n_tokens() && (used[cur_idx] || ((cur_seq_set & seq_set[cur_idx]) != seq_set[cur_idx]))); + + if (cur_idx == get_n_tokens()) { + break; + } + + cur_seq_set = seq_set[cur_idx]; + } + + return ubatch_add(idxs, 1, true); +} + +void llama_batch_allocr::clear() { + n_outputs = 0; + + batch = {}; + + pos .clear(); + n_seq_id .clear(); + seq_id .clear(); + seq_id_unq.clear(); + output .clear(); + + for (auto & cur : seq_pos) { + cur.clear(); + } + + for (auto & cur : seq_cpl) { + std::fill(cur.begin(), cur.end(), false); + } + + seq_set.clear(); + + seq_set_map.clear(); + + std::fill(seq_idx.begin(), seq_idx.end(), -1); +} + +llama_ubatch llama_batch_allocr::ubatch_add(const std::vector & idxs, uint32_t n_seqs, bool equal_seqs) { + const uint32_t n_tokens = idxs.size(); + + assert(n_tokens%n_seqs == 0); + + auto udata = std::make_shared(); + + const int32_t n_pos_cur = batch.embd ? n_pos_per_embd : 1; + + const int64_t n_embd_all = batch.embd ? (int64_t) n_tokens*n_embd : 0; + const int64_t n_pos_all = (int64_t) n_tokens*n_pos_cur; + + udata->token .resize(n_tokens); + udata->embd .resize(n_embd_all); + udata->pos .resize(n_pos_all); + udata->n_seq_id .resize(n_tokens); + udata->seq_id .resize(n_tokens); + udata->seq_id_unq.resize(0); + udata->seq_idx .resize(LLAMA_MAX_SEQ, -1); + udata->output .resize(n_tokens); + + seq_set_t seq_set_unq; + + for (size_t i = 0; i < idxs.size(); ++i) { + if (batch.token) { + udata->token[i] = batch.token[idxs[i]]; + } + + if (batch.embd) { + memcpy(udata->embd.data() + i*n_embd, batch.embd + (int64_t) idxs[i]*n_embd, n_embd*sizeof(float)); + } + + for (int j = 0; j < n_pos_cur; ++j) { + udata->pos[j*n_tokens + i] = batch.pos[j*batch.n_tokens + idxs[i]]; + } + + udata->n_seq_id[i] = batch.n_seq_id[idxs[i]]; + udata->seq_id[i] = batch.seq_id[idxs[i]]; + udata->output[i] = batch.logits[idxs[i]]; + + for (int s = 0; s < udata->n_seq_id[i]; ++s) { + seq_set_unq.set(udata->seq_id[i][s]); + } + + if (udata->output[i]) { + out_ids.push_back(idxs[i]); + } + } + + for (uint32_t s = 0; s < n_seq_max; ++s) { + if (seq_set_unq.test(s)) { + udata->seq_idx[s] = udata->seq_id_unq.size(); + udata->seq_id_unq.push_back(s); + } + } + + llama_ubatch res { + /*.b_equal_seqs =*/ equal_seqs, + /*.n_tokens =*/ n_tokens, + /*.n_seq_tokens =*/ n_tokens/n_seqs, + /*.n_seqs =*/ n_seqs, + /*.n_seqs_unq =*/ (uint32_t) udata->seq_id_unq.size(), + + /*.token =*/ batch.token ? udata->token.data() : nullptr, + /*.embd =*/ batch.embd ? udata->embd.data() : nullptr, + /*.pos =*/ udata->pos.data(), + /*.n_seq_id =*/ udata->n_seq_id.data(), + /*.seq_id =*/ udata->seq_id.data(), + /*.seq_id_unq =*/ udata->seq_id_unq.data(), + /*.seq_idx =*/ udata->seq_idx.data(), + /*.output =*/ udata->output.data(), + /*.data =*/ std::move(udata), + }; + if (debug > 0) { - LLAMA_LOG_DEBUG("%s: input batch info (p0 = %d):\n", __func__, p0); - LLAMA_LOG_DEBUG("%s: n_tokens = %d\n", __func__, batch.n_tokens); - LLAMA_LOG_DEBUG("%s: token = %p\n", __func__, (void *) batch.token); - LLAMA_LOG_DEBUG("%s: embd = %p\n", __func__, (void *) batch.embd); - LLAMA_LOG_DEBUG("%s: pos = %p\n", __func__, (void *) batch.pos); - LLAMA_LOG_DEBUG("%s: n_seq_id = %p\n", __func__, (void *) batch.n_seq_id); - LLAMA_LOG_DEBUG("%s: seq_id = %p\n", __func__, (void *) batch.seq_id); - LLAMA_LOG_DEBUG("%s: logits = %p\n", __func__, (void *) batch.logits); - LLAMA_LOG_DEBUG("%s: n_outputs = %d\n", __func__, n_outputs); + LLAMA_LOG_DEBUG("%s: added ubatch to split:\n", __func__); + + ubatch_print(res, debug); + } + + return res; +} + +void llama_batch_allocr::ubatch_print(const llama_ubatch & ubatch, int debug) { + if (debug > 0) { + LLAMA_LOG_DEBUG("%s: equal_seqs = %d\n", __func__, ubatch.equal_seqs()); + LLAMA_LOG_DEBUG("%s: n_tokens = %d\n", __func__, ubatch.n_tokens); + LLAMA_LOG_DEBUG("%s: n_seq_tokens = %d\n", __func__, ubatch.n_seq_tokens); + LLAMA_LOG_DEBUG("%s: n_seqs = %d\n", __func__, ubatch.n_seqs); + LLAMA_LOG_DEBUG("%s: n_seqs_unq = %d\n", __func__, ubatch.n_seqs_unq); + + std::stringstream ss_seq_id_unq; + std::stringstream ss_seq_idx; + + ss_seq_id_unq << "[ "; + ss_seq_idx << "["; + + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + ss_seq_id_unq << ubatch.seq_id_unq[s] << " "; + } + + for (uint32_t s = 0; s < LLAMA_MAX_SEQ; ++s) { + if (ubatch.seq_idx[s] >= 0) { + ss_seq_idx << ubatch.seq_idx[s]%10; + } else { + ss_seq_idx << "."; + } + } + + ss_seq_id_unq << "]"; + ss_seq_idx << "]"; + + LLAMA_LOG_DEBUG("%s: token = %p\n", __func__, (void *) ubatch.token); + LLAMA_LOG_DEBUG("%s: embd = %p\n", __func__, (void *) ubatch.embd); + LLAMA_LOG_DEBUG("%s: pos = %p\n", __func__, (void *) ubatch.pos); + LLAMA_LOG_DEBUG("%s: n_seq_id = %p\n", __func__, (void *) ubatch.n_seq_id); + LLAMA_LOG_DEBUG("%s: seq_id = %p\n", __func__, (void *) ubatch.seq_id); + LLAMA_LOG_DEBUG("%s: seq_id_unq = %s\n", __func__, ss_seq_id_unq.str().c_str()); + LLAMA_LOG_DEBUG("%s: seq_idx = %s\n", __func__, ss_seq_idx.str().c_str()); + LLAMA_LOG_DEBUG("%s: output = %p\n", __func__, (void *) ubatch.output); + LLAMA_LOG_DEBUG("%s: n_outputs = %d\n", __func__, n_outputs); if (debug > 1) { int seq_id_max = 0; - for (int32_t i = 0; i < batch.n_tokens; ++i) { - for (int s = 0; s < batch.n_seq_id[i]; ++s) { - for (int s = 0; s < batch.n_seq_id[i]; ++s) { - seq_id_max = std::max(seq_id_max, batch.seq_id[i][s]); + for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { + for (int s = 0; s < ubatch.n_seq_id[i]; ++s) { + for (int s = 0; s < ubatch.n_seq_id[i]; ++s) { + seq_id_max = std::max(seq_id_max, ubatch.seq_id[i][s]); } } } ++seq_id_max; LLAMA_LOG_DEBUG("%s: token = [\n", __func__); - for (int32_t i = 0; i < batch.n_tokens; ++i) { + for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { std::vector seq_id(seq_id_max); - for (int s = 0; s < batch.n_seq_id[i]; ++s) { - seq_id[batch.seq_id[i][s]] = 1; + for (int s = 0; s < ubatch.n_seq_id[i]; ++s) { + seq_id[ubatch.seq_id[i][s]] = 1; } std::stringstream ss; @@ -399,33 +798,18 @@ bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & } } - LLAMA_LOG_DEBUG("%s: %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n", - __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(), - batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]); + if (ubatch.token) { + LLAMA_LOG_DEBUG("%s: %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n", + __func__, i, ubatch.token[i], vocab->token_to_piece(ubatch.token[i]).c_str(), + ubatch.pos[i], ubatch.n_seq_id[i], ss.str().c_str(), ubatch.output[i]); + } else { + LLAMA_LOG_DEBUG("%s: %4d: [embd], pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n", + __func__, i, ubatch.pos[i], ubatch.n_seq_id[i], ss.str().c_str(), ubatch.output[i]); + } } LLAMA_LOG_DEBUG("%s: ]\n", __func__); } } - - return true; -} - -const llama_batch & llama_batch_allocr::get_batch() const { - return batch; -} - -uint32_t llama_batch_allocr::get_n_outputs() const { - return n_outputs; -} - -void llama_batch_allocr::clear() { - n_outputs = 0; - - batch = {}; - pos.clear(); - n_seq_id.clear(); - seq_id.clear(); - output.clear(); } // @@ -436,25 +820,25 @@ struct llama_batch llama_batch_get_one( llama_token * tokens, int32_t n_tokens) { return { - /*n_tokens =*/ n_tokens, - /*tokens =*/ tokens, - /*embd =*/ nullptr, - /*pos =*/ nullptr, - /*n_seq_id =*/ nullptr, - /*seq_id =*/ nullptr, - /*logits =*/ nullptr, + /*n_tokens =*/ n_tokens, + /*tokens =*/ tokens, + /*embd =*/ nullptr, + /*pos =*/ nullptr, + /*n_seq_id =*/ nullptr, + /*seq_id =*/ nullptr, + /*logits =*/ nullptr, }; } struct llama_batch llama_batch_init(int32_t n_tokens_alloc, int32_t embd, int32_t n_seq_max) { llama_batch batch = { - /*n_tokens =*/ 0, - /*tokens =*/ nullptr, - /*embd =*/ nullptr, - /*pos =*/ nullptr, - /*n_seq_id =*/ nullptr, - /*seq_id =*/ nullptr, - /*logits =*/ nullptr, + /*n_tokens =*/ 0, + /*tokens =*/ nullptr, + /*embd =*/ nullptr, + /*pos =*/ nullptr, + /*n_seq_id =*/ nullptr, + /*seq_id =*/ nullptr, + /*logits =*/ nullptr, }; if (embd) { diff --git a/src/llama-batch.h b/src/llama-batch.h index 1e0be8ac2c6ce..d563adc66aaf5 100644 --- a/src/llama-batch.h +++ b/src/llama-batch.h @@ -2,98 +2,125 @@ #include "llama.h" +#include "llama-cparams.h" + #include #include +#include +#include +#include +#include -// very similar to llama_batch, -// but has more metadata about sequences +// keep this struct lightweight struct llama_ubatch { - bool equal_seqs; + bool equal_seqs() const { + return b_equal_seqs != 0; + } + + uint32_t b_equal_seqs; // note: this is a boolean, but we use an int32_t for alignment + // otherwise address sanitizer complains // TODO: whole_seqs for embeddings? uint32_t n_tokens; // total tokens (n_seq_tokens * n_seqs) - uint32_t n_seq_tokens; // tokens per sequence - uint32_t n_seqs; - - llama_token * token; // [n_tokens] - float * embd; // [n_embd, n_tokens] - llama_pos * pos; // [n_tokens] - int32_t * n_seq_id; // [n_seqs] - llama_seq_id ** seq_id; // [n_seqs] - int8_t * output; // [n_tokens] -}; - -struct llama_sbatch_seq { - int32_t n_seq_id; - - llama_seq_id * seq_id; - - size_t offset; - size_t length; -}; - -// sequence-length-aware batch splitting -struct llama_sbatch { - // tokens left in this batch - size_t n_tokens; - - size_t n_embd; - - // sorted indices into the batch - std::vector ids; - // batch indices of the output - std::vector out_ids; - std::vector seq; - - const llama_batch * batch = nullptr; - - // buffers for the ubatches - // TODO: very hacky, this needs a complete rework - struct ubatch_data { + uint32_t n_seq_tokens; // tokens per sequence set + uint32_t n_seqs; // sequence sets in the ubatch + uint32_t n_seqs_unq; // unique sequence ids in the ubatch + + // seq_id_unq: unique sequence ids in the ubatch + // seq_idx: indices of the unique sequence ids in the ubatch in [0, n_seqs_unq) + // used for extracting sequence pooled embeddings + + // // size | idx | val + llama_token * token; // [n_tokens] | i | id, token + float * embd; // [n_embd, n_tokens] | i | embd + llama_pos * pos; // [n_tokens] | i | pos + int32_t * n_seq_id; // [n_tokens] | i | - + llama_seq_id ** seq_id; // [n_tokens] | s | s0, s1, seq_id + llama_seq_id * seq_id_unq; // [n_seqs_unq] | s | seq_id + int32_t * seq_idx; // [LLAMA_MAX_SEQ] | - | seq_idx + int8_t * output; // [n_tokens] | i | - + + struct data_t { std::vector token; std::vector embd; std::vector pos; std::vector n_seq_id; std::vector seq_id; + std::vector seq_id_unq; + std::vector seq_idx; std::vector output; }; - std::vector udatas; + // the llama_ubatch pointers above point to this data if set. otherwise - points to non-owning data + std::shared_ptr data; +}; - llama_ubatch reserve_ubatch(size_t n_ubatch, bool has_embd = false); +// a helper for sanitizing, fulfilling and splitting a batch +class llama_batch_allocr { +public: + llama_batch_allocr(uint32_t n_pos_per_embd); + + // sanitize and auto-gen missing data in the input batch + // memory is optional. if provided will be used to check for sequence continuity and to determine the positions + bool init( + const llama_batch & batch_inp, + const llama_vocab & vocab, + const llama_memory_i * memory, + uint32_t n_embd, + uint32_t n_seq_max, + bool output_all); - void add_seq_to_ubatch(llama_ubatch & ubatch, llama_sbatch_seq & seq, size_t length); + const llama_batch & get_batch() const; - // simple split, unknown number of sequences of unequal lengths - llama_ubatch split_simple(size_t n_ubatch); + uint32_t get_n_tokens() const; + uint32_t get_n_outputs() const; + uint32_t get_n_used() const; - // make batches of equal-length sequences - llama_ubatch split_equal(size_t n_ubatch); + // the array of output indices in the order they were encountered during the ubatch splitting + std::vector & get_out_ids(); - // sequence-wise split - llama_ubatch split_seq(size_t n_ubatch); + // min/max positions of each sequence in the current ubatch + llama_pos seq_pos_min(llama_seq_id seq_id) const; + llama_pos seq_pos_max(llama_seq_id seq_id) const; - llama_sbatch() = default; - llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split = false); -}; + // call once before splitting the batch to reset the internal state + void split_reset(); -// temporary allocate memory for the input batch if needed -class llama_batch_allocr { -public: - llama_batch_allocr(); + // simple split, unknown number of sequence sets of unequal lengths + llama_ubatch split_simple(uint32_t n_ubatch); - // optionally fulfill the batch returned by llama_batch_get_one - bool init(const llama_batch & batch_inp, const llama_vocab & vocab, llama_pos p0); + // make ubatches of equal-length sequences sets + // if sequential == true, the tokens in the ubatch will have increasing sequential sequence ids + llama_ubatch split_equal(uint32_t n_ubatch, bool sequential); - const llama_batch & get_batch() const; + // sequence-set-wise split - each ubatch contains a single sequence-set + llama_ubatch split_seq(uint32_t n_ubatch); - uint32_t get_n_outputs() const; + // a helper method for creating a well-defined ubatch of tokens + // TODO: support embeddings if needed in the future + llama_ubatch ubatch_reserve(uint32_t n_seq_tokens, uint32_t n_seqs); private: void clear(); + // create the next ubatch based on the provided batch indices (idxs) and the number of sequence sets (n_seqs) + // return llama_ubatch.n_tokens == 0 if the entire batch was consumed + llama_ubatch ubatch_add(const std::vector & idxs, uint32_t n_seqs, bool equal_seqs); + + // for debugging, start with LLAMA_BATCH_DEBUG=2 + void ubatch_print(const llama_ubatch & ubatch, int debug); + llama_batch batch; + // only for debugging purposes + const llama_vocab * vocab; + + // TODO: this is more of a temporary solution until we have a better way to handle multiple positions per token/embd + // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762 + const uint32_t n_pos_per_embd; + + uint32_t n_embd; + uint32_t n_seq_max; uint32_t n_outputs; std::array seq_id_0 = { 0 }; // default sequence id @@ -101,7 +128,33 @@ class llama_batch_allocr { std::vector pos; std::vector n_seq_id; std::vector seq_id; + std::vector seq_id_unq; + std::vector seq_idx; std::vector output; + using pos_set_t = std::set; + using seq_cpl_t = std::vector; + + // helper flag to quickly determine if there are any coupled sequences in the batch + bool has_cpl = false; + + std::vector seq_pos; // seq_pos[s]: the set of positions in sequence s + std::vector seq_cpl; // seq_cpl[s0][s1]: if sequence s0 is coupled to sequence s1 + + using idx_vec_t = std::vector; + using seq_set_t = std::bitset; + + std::vector seq_set; // seq_set[i]: the sequence set of token i + + std::unordered_map seq_set_map; // the indices at which the sequence set appears + + // batch indices of the output + std::vector out_ids; + + uint32_t n_used; + + // used[i] indicates if token i has already been used in a previous ubatch + std::vector used; + int debug; }; diff --git a/src/llama-chat.cpp b/src/llama-chat.cpp index d12743e6b9a0c..240937eceee9d 100644 --- a/src/llama-chat.cpp +++ b/src/llama-chat.cpp @@ -64,6 +64,8 @@ static const std::map LLM_CHAT_TEMPLATES = { { "bailing", LLM_CHAT_TEMPLATE_BAILING }, { "llama4", LLM_CHAT_TEMPLATE_LLAMA4 }, { "smolvlm", LLM_CHAT_TEMPLATE_SMOLVLM }, + { "hunyuan-moe", LLM_CHAT_TEMPLATE_HUNYUAN_MOE }, + { "kimi-k2", LLM_CHAT_TEMPLATE_KIMI_K2 }, }; llm_chat_template llm_chat_template_from_str(const std::string & name) { @@ -169,7 +171,7 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { // ref: https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct/discussions/8#66bae61b1893d14ee8ed85bb // EXAONE-3.0-7.8B-Instruct return LLM_CHAT_TEMPLATE_EXAONE_3; - } else if (tmpl_contains("rwkv-world")) { + } else if (tmpl_contains("rwkv-world") || tmpl_contains("{{- 'User: ' + message['content']|trim + '\\n\\n' -}}")) { return LLM_CHAT_TEMPLATE_RWKV_WORLD; } else if (tmpl_contains("<|start_of_role|>")) { return LLM_CHAT_TEMPLATE_GRANITE; @@ -183,6 +185,12 @@ llm_chat_template llm_chat_detect_template(const std::string & tmpl) { return LLM_CHAT_TEMPLATE_BAILING; } else if (tmpl_contains("<|header_start|>") && tmpl_contains("<|header_end|>")) { return LLM_CHAT_TEMPLATE_LLAMA4; + } else if (tmpl_contains("<|endofuserprompt|>")) { + return LLM_CHAT_TEMPLATE_DOTS1; + } else if (tmpl_contains("<|startoftext|>") && tmpl_contains("<|extra_4|>")) { + return LLM_CHAT_TEMPLATE_HUNYUAN_MOE; + } else if (tmpl_contains("<|im_assistant|>assistant<|im_middle|>")) { + return LLM_CHAT_TEMPLATE_KIMI_K2; } return LLM_CHAT_TEMPLATE_UNKNOWN; } @@ -331,7 +339,7 @@ int32_t llm_chat_apply_template( std::string role(message->role); if (role == "system") { // there is no system message for gemma, but we will merge it with user prompt, so nothing is broken - system_prompt = trim(message->content); + system_prompt += trim(message->content); continue; } // in gemma, "assistant" is "model" @@ -353,7 +361,7 @@ int32_t llm_chat_apply_template( std::string role(message->role); if (role == "system") { // there is no system message support, we will merge it with user prompt - system_prompt = message->content; + system_prompt += message->content; continue; } else if (role == "user") { ss << "Human: "; @@ -526,12 +534,17 @@ int32_t llm_chat_apply_template( } } else if (tmpl == LLM_CHAT_TEMPLATE_RWKV_WORLD) { // this template requires the model to have "\n\n" as EOT token - for (auto message : chat) { - std::string role(message->role); - if (role == "user") { - ss << "User: " << message->content << "\n\nAssistant:"; - } else { - ss << message->content << "\n\n"; + for (size_t i = 0; i < chat.size(); i++) { + std::string role(chat[i]->role); + if (role == "system") { + ss << "System: " << trim(chat[i]->content) << "\n\n"; + } else if (role == "user") { + ss << "User: " << trim(chat[i]->content) << "\n\n"; + if (i == chat.size() - 1) { + ss << "Assistant:"; + } + } else if (role == "assistant") { + ss << "Assistant: " << trim(chat[i]->content) << "\n\n"; } } } else if (tmpl == LLM_CHAT_TEMPLATE_GRANITE) { @@ -643,6 +656,53 @@ int32_t llm_chat_apply_template( if (add_ass) { ss << "Assistant:"; } + } else if (tmpl == LLM_CHAT_TEMPLATE_DOTS1) { + // dots.llm1.inst (DOTS1) + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << "<|system|>" << message->content << "<|endofsystem|>"; + } else if (role == "user") { + ss << "<|userprompt|>" << message->content << "<|endofuserprompt|>"; + } else { + ss << "<|response|>" << message->content << "<|endofresponse|>"; + } + } + if (add_ass) { + ss << "<|response|>"; + } + } else if (tmpl == LLM_CHAT_TEMPLATE_HUNYUAN_MOE) { + // tencent/Hunyuan-A13B-Instruct + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << "<|startoftext|>" << message->content << "<|extra_4|>"; + } else if (role == "assistant") { + ss << "<|startoftext|>" << message->content << "<|eos|>"; + } else { + ss << "<|startoftext|>" << message->content << "<|extra_0|>"; + } + } + } else if (tmpl == LLM_CHAT_TEMPLATE_KIMI_K2) { + // moonshotai/Kimi-K2-Instruct + for (auto message : chat) { + std::string role(message->role); + if (role == "system") { + ss << "<|im_system|>system<|im_middle|>"; + } else if (role == "user") { + ss << "<|im_user|>user<|im_middle|>"; + } else if (role == "assistant") { + ss << "<|im_assistant|>assistant<|im_middle|>"; + } else if (role == "tool") { + ss << "<|im_system|>tool<|im_middle|>"; + } + + ss << message->content << "<|im_end|>"; + + if (add_ass) { + ss << "<|im_assistant|>assistant<|im_middle|>"; + } + } } else { // template not supported return -1; diff --git a/src/llama-chat.h b/src/llama-chat.h index db24ade21e2ad..cab0533485652 100644 --- a/src/llama-chat.h +++ b/src/llama-chat.h @@ -43,6 +43,9 @@ enum llm_chat_template { LLM_CHAT_TEMPLATE_BAILING, LLM_CHAT_TEMPLATE_LLAMA4, LLM_CHAT_TEMPLATE_SMOLVLM, + LLM_CHAT_TEMPLATE_DOTS1, + LLM_CHAT_TEMPLATE_HUNYUAN_MOE, + LLM_CHAT_TEMPLATE_KIMI_K2, LLM_CHAT_TEMPLATE_UNKNOWN, }; diff --git a/src/llama-context.cpp b/src/llama-context.cpp index ec1e1189b219a..4e1d911593dec 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -20,7 +20,7 @@ llama_context::llama_context( const llama_model & model, llama_context_params params) : model(model), - batch_allocr(std::make_unique()) { + balloc(std::make_unique(model.hparams.n_pos_per_embd())) { LLAMA_LOG_INFO("%s: constructing llama_context\n", __func__); t_start_us = model.t_start_us; @@ -29,8 +29,8 @@ llama_context::llama_context( const auto & hparams = model.hparams; cparams.n_seq_max = std::max(1u, params.n_seq_max); - if (cparams.n_seq_max > LLAMA_MAX_PARALLEL_SEQUENCES) { - throw std::runtime_error("n_seq_max must be <= " + std::to_string(LLAMA_MAX_PARALLEL_SEQUENCES)); + if (cparams.n_seq_max > LLAMA_MAX_SEQ) { + throw std::runtime_error("n_seq_max must be <= " + std::to_string(LLAMA_MAX_SEQ)); } cparams.n_threads = params.n_threads; @@ -98,10 +98,20 @@ llama_context::llama_context( LLAMA_LOG_WARN("%s: n_batch is less than GGML_KQ_MASK_PAD - increasing to %d\n", __func__, GGML_KQ_MASK_PAD); cparams.n_batch = GGML_KQ_MASK_PAD; } - cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch); cparams.op_offload = params.op_offload; + cparams.kv_unified = params.kv_unified; + + { + const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS"); + const bool supports_set_rows = LLAMA_SET_ROWS ? (atoi(LLAMA_SET_ROWS) != 0) : false; + + if (!supports_set_rows && !cparams.kv_unified) { + LLAMA_LOG_WARN("%s: non-unified KV cache requires ggml_set_rows() - forcing unified KV cache\n", __func__); + cparams.kv_unified = true; + } + } const uint32_t n_ctx_per_seq = cparams.n_ctx / cparams.n_seq_max; @@ -112,6 +122,7 @@ llama_context::llama_context( LLAMA_LOG_INFO("%s: n_ubatch = %u\n", __func__, cparams.n_ubatch); LLAMA_LOG_INFO("%s: causal_attn = %d\n", __func__, cparams.causal_attn); LLAMA_LOG_INFO("%s: flash_attn = %d\n", __func__, cparams.flash_attn); + LLAMA_LOG_INFO("%s: kv_unified = %s\n", __func__, cparams.kv_unified ? "true" : "false"); LLAMA_LOG_INFO("%s: freq_base = %.1f\n", __func__, cparams.rope_freq_base); LLAMA_LOG_INFO("%s: freq_scale = %g\n", __func__, cparams.rope_freq_scale); @@ -227,8 +238,8 @@ llama_context::llama_context( LLAMA_LOG_DEBUG("%s: max_nodes = %zu\n", __func__, max_nodes); - // buffer used to store the computation graph and the tensor meta data - buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false)); + gf_res_prev.reset(new llm_graph_result(max_nodes)); + gf_res_reserve.reset(new llm_graph_result(max_nodes)); // TODO: move these checks to ggml_backend_sched // enabling pipeline parallelism in the scheduler increases memory usage, so it is only done when necessary @@ -267,7 +278,7 @@ llama_context::llama_context( // reserve worst-case graph if (!hparams.vocab_only && memory) { - const uint32_t n_seqs = cparams.n_seq_max; + const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max; const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch); LLAMA_LOG_DEBUG("%s: worst-case: n_tokens = %d, n_seqs = %d, n_outputs = %d\n", __func__, n_tokens, n_seqs, n_outputs); @@ -280,8 +291,8 @@ llama_context::llama_context( // simulate full KV cache - const auto mstate = memory->init_full(); - if (!mstate) { + const auto mctx = memory->init_full(); + if (!mctx) { throw std::runtime_error("failed to initialize KV cache"); } @@ -289,7 +300,7 @@ llama_context::llama_context( // reserve pp graph first so that buffers are only allocated once { - auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mstate.get()); + auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get()); if (!gf) { throw std::runtime_error("failed to allocate compute pp buffers"); } @@ -300,7 +311,7 @@ llama_context::llama_context( // reserve with tg graph to get the number of splits and nodes { - auto * gf = graph_reserve(1, 1, 1, mstate.get()); + auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get()); if (!gf) { throw std::runtime_error("failed to allocate compute tg buffers"); } @@ -311,7 +322,11 @@ llama_context::llama_context( // reserve again with pp graph to avoid ggml-alloc reallocations during inference { - auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mstate.get()); + // TODO: not sure if the following graph would be worster case for multi-stream KV caches: + // + // auto * gf = graph_reserve(n_tokens, 1, n_tokens, mctx.get()); + // + auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get()); if (!gf) { throw std::runtime_error("failed to allocate compute pp buffers"); } @@ -388,10 +403,6 @@ ggml_backend_sched_t llama_context::get_sched() const { return sched.get(); } -ggml_context * llama_context::get_ctx_compute() const { - return ctx_compute.get(); -} - uint32_t llama_context::n_ctx() const { return cparams.n_ctx; } @@ -444,8 +455,8 @@ bool llama_context::kv_self_update(bool optimize) { optimize |= memory_force_optimize; memory_force_optimize = false; - const auto mstate = memory->init_update(this, optimize); - switch (mstate->get_status()) { + const auto mctx = memory->init_update(this, optimize); + switch (mctx->get_status()) { case LLAMA_MEMORY_STATUS_SUCCESS: { // noop @@ -463,22 +474,27 @@ bool llama_context::kv_self_update(bool optimize) { } } - if (!mstate->apply()) { + // reset the previous graph result to make sure that it won't be reused + // TODO: change the mctx->apply() to return information if a graph reserve is needed + // reset the graph result only if the memory module did reset the scheduler + gf_res_prev->reset(); + + if (!mctx->apply()) { LLAMA_LOG_ERROR("%s: failed to apply memory update\n", __func__); } } // if the memory module did any computation, we have to reserve a new worst-case graph { - const auto mstate = memory->init_full(); - if (!mstate) { - throw std::runtime_error("failed to initialize memory state"); + const auto mctx = memory->init_full(); + if (!mctx) { + throw std::runtime_error("failed to initialize memory context"); } - const uint32_t n_seqs = cparams.n_seq_max; + const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max; const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch); - auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mstate.get()); + auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get()); if (!gf) { LLAMA_LOG_ERROR("%s: failed to reserve graph after the memory update\n", __func__); } @@ -678,38 +694,59 @@ bool llama_context::apply_adapter_cvec( return cvec.apply(model, data, len, n_embd, il_start, il_end); } -llm_graph_result_ptr llama_context::process_ubatch(const llama_ubatch & ubatch, llm_graph_type gtype, llama_memory_state_i * mstate, ggml_status & ret) { - if (mstate && !mstate->apply()) { - LLAMA_LOG_ERROR("%s: failed to apply memory state\n", __func__); +llm_graph_result_i * llama_context::process_ubatch(const llama_ubatch & ubatch, llm_graph_type gtype, llama_memory_context_i * mctx, ggml_status & ret) { + if (mctx && !mctx->apply()) { + LLAMA_LOG_ERROR("%s: failed to apply memory context\n", __func__); ret = GGML_STATUS_FAILED; return nullptr; } - auto * gf = graph_init(); - if (!gf) { - LLAMA_LOG_ERROR("%s: failed to initialize graph\n", __func__); - ret = GGML_STATUS_FAILED; - return nullptr; - } + auto * res = gf_res_prev.get(); + auto * gf = res->get_gf(); - auto res = graph_build(ctx_compute.get(), gf, ubatch, gtype, mstate); - if (!res) { - LLAMA_LOG_ERROR("%s: failed to build graph\n", __func__); - ret = GGML_STATUS_FAILED; - return nullptr; - } + // the new graph parameters + // in order to correctly reuse a graph, it's full topology has to be uniquely determined by these parameters + const auto gparams = graph_params(res, ubatch, mctx, gtype); - // LLAMA_LOG_INFO("graph build time: %.3f ms (%d nodes, %d leafs)\n", (ggml_time_us() - t_start_us)/1000.0, gf->n_nodes, gf->n_leafs); + if (res->can_reuse(gparams)) { + //LLAMA_LOG_DEBUG("%s: reusing previous graph\n", __func__); - if (!ggml_backend_sched_alloc_graph(sched.get(), gf)) { - LLAMA_LOG_ERROR("%s: failed to allocate graph\n", __func__); - ret = GGML_STATUS_ALLOC_FAILED; - return nullptr; + n_reused++; + } else { + res->reset(); + + ggml_backend_sched_reset(sched.get()); + ggml_backend_sched_set_eval_callback(sched.get(), cparams.cb_eval, cparams.cb_eval_user_data); + + //const auto t_start_us = ggml_time_us(); + + gf = model.build_graph(gparams); + + //LLAMA_LOG_INFO("graph build time: %.3f ms\n", (ggml_time_us() - t_start_us)/1000.0); + + if (!gf) { + LLAMA_LOG_ERROR("%s: failed to initialize graph\n", __func__); + ret = GGML_STATUS_FAILED; + return nullptr; + } + + if (!ggml_backend_sched_alloc_graph(sched.get(), gf)) { + LLAMA_LOG_ERROR("%s: failed to allocate graph\n", __func__); + ret = GGML_STATUS_ALLOC_FAILED; + return nullptr; + } } - res->set_inputs(&ubatch); + // set the input data for the input tensors + { + //const auto t_start_us = ggml_time_us(); + + res->set_inputs(&ubatch); - const auto status = graph_compute(gf, ubatch.n_tokens > 1); + //LLAMA_LOG_INFO("graph set inputs time: %.3f ms\n", (ggml_time_us() - t_start_us)/1000.0); + } + + const auto status = graph_compute(res->get_gf(), ubatch.n_tokens > 1); if (status != GGML_STATUS_SUCCESS) { LLAMA_LOG_ERROR("%s: failed to compute graph, compute status: %d\n", __func__, status); ret = status; @@ -722,23 +759,29 @@ llm_graph_result_ptr llama_context::process_ubatch(const llama_ubatch & ubatch, } int llama_context::encode(const llama_batch & batch_inp) { + GGML_ASSERT((!batch_inp.token && batch_inp.embd) || (batch_inp.token && !batch_inp.embd)); // NOLINT + if (batch_inp.n_tokens == 0) { LLAMA_LOG_ERROR("%s: n_tokens == 0\n", __func__); return -1; } - // temporary allocate memory for the input batch if needed + const auto & hparams = model.hparams; + + const int64_t n_embd = hparams.n_embd; + const int32_t n_vocab = model.vocab.n_tokens(); + // note: during encode, we always pass the full sequence starting from pos = 0 - if (!batch_allocr->init(batch_inp, model.vocab, batch_inp.pos ? -1 : 0)) { + if (!balloc->init(batch_inp, model.vocab, nullptr, n_embd, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) { LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__); return -1; } - const llama_batch & batch = batch_allocr->get_batch(); + const uint32_t n_tokens = balloc->get_n_tokens(); - const uint32_t n_tokens = batch.n_tokens; - - GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT + // [TAG_NO_CACHE_PAD] + // TODO: add new split mode where we pad the input sequences so that ubatch.equal_seqs == true + const llama_ubatch ubatch = balloc->split_simple(n_tokens); // micro-batching is not possible for non-causal encoding, so we process the batch in a single shot GGML_ASSERT(cparams.n_ubatch >= n_tokens && "encoder requires n_ubatch >= n_tokens"); @@ -752,14 +795,6 @@ int llama_context::encode(const llama_batch & batch_inp) { n_queued_tokens += n_tokens; - const auto & hparams = model.hparams; - - const int64_t n_embd = hparams.n_embd; - - llama_sbatch sbatch = llama_sbatch(batch, n_embd, /* simple_split */ true); - - const llama_ubatch ubatch = sbatch.split_simple(n_tokens); - // reserve output buffer if (output_reserve(n_tokens) < n_tokens) { LLAMA_LOG_ERROR("%s: could not reserve space for batch with %u outputs\n", __func__, n_tokens); @@ -772,9 +807,6 @@ int llama_context::encode(const llama_batch & batch_inp) { n_outputs = n_tokens; - ggml_backend_sched_reset(sched.get()); - ggml_backend_sched_set_eval_callback(sched.get(), cparams.cb_eval, cparams.cb_eval_user_data); - const auto causal_attn_org = cparams.causal_attn; // always use non-causal attention for encoder graphs @@ -783,7 +815,7 @@ int llama_context::encode(const llama_batch & batch_inp) { cparams.causal_attn = false; ggml_status status; - const auto res = process_ubatch(ubatch, LLM_GRAPH_TYPE_ENCODER, nullptr, status); + const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_ENCODER, nullptr, status); cparams.causal_attn = causal_attn_org; @@ -796,10 +828,20 @@ int llama_context::encode(const llama_batch & batch_inp) { } } + auto * t_logits = res->get_logits(); auto * t_embd = res->get_embd_pooled() ? res->get_embd_pooled() : res->get_embd(); + // extract logits + if (logits && t_logits) { + ggml_backend_t backend_res = ggml_backend_sched_get_tensor_backend(sched.get(), t_logits); + GGML_ASSERT(backend_res != nullptr); + GGML_ASSERT(logits != nullptr); + + ggml_backend_tensor_get_async(backend_res, t_logits, logits, 0, n_tokens*n_vocab*sizeof(float)); + } + // extract embeddings - if (t_embd) { + if (embd && t_embd) { ggml_backend_t backend_embd = ggml_backend_sched_get_tensor_backend(sched.get(), t_embd); GGML_ASSERT(backend_embd != nullptr); @@ -818,34 +860,28 @@ int llama_context::encode(const llama_batch & batch_inp) { { // extract sequence embeddings auto & embd_seq_out = embd_seq; - embd_seq_out.clear(); - GGML_ASSERT(!ubatch.equal_seqs); // TODO: handle equal splits + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + const llama_seq_id seq_id = ubatch.seq_id_unq[s]; + const int32_t seq_idx = ubatch.seq_idx[seq_id]; - // TODO: fix indexing [UBATCH_IDX] - for (uint32_t i = 0; i < n_tokens; i++) { - const llama_seq_id seq_id = ubatch.seq_id[i][0]; - if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { - continue; - } embd_seq_out[seq_id].resize(n_embd); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_embd*seq_id)*sizeof(float), n_embd*sizeof(float)); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_embd*seq_idx)*sizeof(float), n_embd*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_RANK: { // extract the rerank score - n_cls_out floats per sequence auto & embd_seq_out = embd_seq; + const uint32_t n_cls_out = hparams.n_cls_out; - // TODO: fix indexing [UBATCH_IDX] - for (uint32_t s = 0; s < ubatch.n_seqs; ++s) { - const llama_seq_id seq_id = ubatch.seq_id[s][0]; - if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { - continue; - } + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + const llama_seq_id seq_id = ubatch.seq_id_unq[s]; + const int32_t seq_idx = ubatch.seq_idx[seq_id]; + embd_seq_out[seq_id].resize(n_cls_out); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_cls_out*seq_id)*sizeof(float), n_cls_out*sizeof(float)); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_cls_out*seq_idx)*sizeof(float), n_cls_out*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_UNSPECIFIED: @@ -855,10 +891,6 @@ int llama_context::encode(const llama_batch & batch_inp) { } } - // Reset state for the next token before backend sync, to allow the CPU activities in the reset to - // overlap with device computation. - ggml_backend_sched_reset(sched.get()); - // TODO: hacky solution if (model.arch == LLM_ARCH_T5 && t_embd) { //cross.t_embd = t_embd; @@ -870,12 +902,16 @@ int llama_context::encode(const llama_batch & batch_inp) { cross.v_embd.resize(cross.n_embd*cross.n_enc); memcpy(cross.v_embd.data(), embd, ggml_nbytes(t_embd)); + const auto & batch = balloc->get_batch(); + // remember the sequence ids used during the encoding - needed for cross attention later cross.seq_ids_enc.resize(n_tokens); for (uint32_t i = 0; i < n_tokens; i++) { cross.seq_ids_enc[i].clear(); + for (int s = 0; s < batch.n_seq_id[i]; s++) { - llama_seq_id seq_id = batch.seq_id[i][s]; + const llama_seq_id seq_id = batch.seq_id[i][s]; + cross.seq_ids_enc[i].insert(seq_id); } } @@ -885,6 +921,8 @@ int llama_context::encode(const llama_batch & batch_inp) { } int llama_context::decode(const llama_batch & batch_inp) { + GGML_ASSERT((!batch_inp.token && batch_inp.embd) || (batch_inp.token && !batch_inp.embd)); // NOLINT + if (!memory) { LLAMA_LOG_DEBUG("%s: cannot decode batches with this context (calling encode() instead)\n", __func__); return encode(batch_inp); @@ -895,30 +933,24 @@ int llama_context::decode(const llama_batch & batch_inp) { return -1; } - // temporary allocate memory for the input batch if needed - if (!batch_allocr->init(batch_inp, model.vocab, batch_inp.pos ? -1 : memory->seq_pos_max(0) + 1)) { - LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__); - return -1; - } - - const llama_batch & batch = batch_allocr->get_batch(); - const auto & vocab = model.vocab; const auto & hparams = model.hparams; const int32_t n_vocab = vocab.n_tokens(); const int64_t n_embd = hparams.n_embd; - const uint32_t n_tokens_all = batch.n_tokens; + // when computing embeddings, all tokens are output + const bool output_all = cparams.embeddings; - GGML_ASSERT((!batch.token && batch.embd) || (batch.token && !batch.embd)); // NOLINT - - // this indicates we are doing pooled embedding - const bool embd_pooled = cparams.embeddings && cparams.pooling_type != LLAMA_POOLING_TYPE_NONE; + if (!balloc->init(batch_inp, vocab, memory.get(), n_embd, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, output_all)) { + LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__); + return -1; + } - const uint32_t n_outputs_all = batch_allocr->get_n_outputs(); + const uint32_t n_tokens_all = balloc->get_n_tokens(); + const uint32_t n_outputs_all = balloc->get_n_outputs(); - if (embd_pooled) { + if (output_all) { // require that all tokens are output if (n_outputs_all != n_tokens_all) { LLAMA_LOG_ERROR("%s: pooled embedding requires that all tokens are output (n_outputs_all = %d, n_tokens_all = %d)\n", @@ -944,21 +976,21 @@ int llama_context::decode(const llama_batch & batch_inp) { // handle any pending defrags/shifts kv_self_update(false); - llama_memory_state_ptr mstate; + llama_memory_context_ptr mctx; while (true) { - mstate = memory->init_batch(batch, cparams.n_ubatch, embd_pooled); - if (!mstate) { + mctx = memory->init_batch(*balloc, cparams.n_ubatch, output_all); + if (!mctx) { return -2; } - switch (mstate->get_status()) { + switch (mctx->get_status()) { case LLAMA_MEMORY_STATUS_SUCCESS: { } break; case LLAMA_MEMORY_STATUS_NO_UPDATE: { - LLAMA_LOG_ERROR("%s: unexpected memory state status: %d\n", __func__, mstate->get_status()); + LLAMA_LOG_ERROR("%s: unexpected memory context status: %d\n", __func__, mctx->get_status()); return -2; } @@ -968,19 +1000,19 @@ int llama_context::decode(const llama_batch & batch_inp) { did_optimize = true; if (kv_self_update(true)) { - LLAMA_LOG_DEBUG("%s: retrying batch size %d after cache optimization\n", __func__, batch.n_tokens); + LLAMA_LOG_DEBUG("%s: retrying batch size %d after cache optimization\n", __func__, balloc->get_n_tokens()); continue; } } - LLAMA_LOG_WARN("%s: failed to find a memory slot for batch of size %d\n", __func__, batch.n_tokens); + LLAMA_LOG_WARN("%s: failed to find a memory slot for batch of size %d\n", __func__, balloc->get_n_tokens()); return 1; } case LLAMA_MEMORY_STATUS_FAILED_COMPUTE: { - LLAMA_LOG_ERROR("%s: compute failed while preparing batch of size %d\n", __func__, batch.n_tokens); + LLAMA_LOG_ERROR("%s: compute failed while preparing batch of size %d\n", __func__, balloc->get_n_tokens()); return -2; } @@ -998,7 +1030,7 @@ int llama_context::decode(const llama_batch & batch_inp) { int64_t n_outputs_prev = 0; do { - const auto & ubatch = mstate->get_ubatch(); + const auto & ubatch = mctx->get_ubatch(); // count the outputs in this ubatch { @@ -1007,7 +1039,6 @@ int llama_context::decode(const llama_batch & batch_inp) { if (n_outputs_all == n_tokens_all) { n_outputs_new = ubatch.n_tokens; } else { - GGML_ASSERT(ubatch.output); for (uint32_t i = 0; i < ubatch.n_tokens; i++) { n_outputs_new += (int32_t) (ubatch.output[i] != 0); } @@ -1017,27 +1048,23 @@ int llama_context::decode(const llama_batch & batch_inp) { n_outputs = n_outputs_new; } - ggml_backend_sched_reset(sched.get()); - ggml_backend_sched_set_eval_callback(sched.get(), cparams.cb_eval, cparams.cb_eval_user_data); - ggml_status status; - const auto res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mstate.get(), status); + const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status); if (!res) { // the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache - llama_pos pos_min[LLAMA_MAX_PARALLEL_SEQUENCES]; - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + llama_pos pos_min[LLAMA_MAX_SEQ]; + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { pos_min[s] = std::numeric_limits::max(); } - // TODO: fix sequence indexing for (uint32_t i = 0; i < ubatch.n_tokens; ++i) { const auto & seq_id = ubatch.seq_id[i][0]; pos_min[seq_id] = std::min(pos_min[seq_id], ubatch.pos[i]); } - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { if (pos_min[s] == std::numeric_limits::max()) { continue; } @@ -1060,7 +1087,7 @@ int llama_context::decode(const llama_batch & batch_inp) { // ggml_graph_dump_dot(gf, NULL, "llama.dot"); //} - auto * t_logits = cparams.embeddings ? nullptr : res->get_logits(); + auto * t_logits = res->get_logits(); auto * t_embd = cparams.embeddings ? res->get_embd() : nullptr; if (t_embd && res->get_embd_pooled()) { @@ -1107,27 +1134,27 @@ int llama_context::decode(const llama_batch & batch_inp) { // extract sequence embeddings (cleared before processing each batch) auto & embd_seq_out = embd_seq; - for (uint32_t s = 0; s < ubatch.n_seqs; ++s) { - const llama_seq_id seq_id = ubatch.seq_id[s][0]; - if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { - continue; - } + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + const llama_seq_id seq_id = ubatch.seq_id_unq[s]; + const int32_t seq_idx = ubatch.seq_idx[seq_id]; + embd_seq_out[seq_id].resize(n_embd); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_embd*seq_id)*sizeof(float), n_embd*sizeof(float)); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_embd*seq_idx)*sizeof(float), n_embd*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_RANK: { - // extract the rerank score - a single float per sequence + // extract the rerank score - n_cls_out floats per sequence auto & embd_seq_out = embd_seq; - for (uint32_t s = 0; s < ubatch.n_seqs; ++s) { - const llama_seq_id seq_id = ubatch.seq_id[s][0]; - if (embd_seq_out.find(seq_id) != embd_seq_out.end()) { - continue; - } - embd_seq_out[seq_id].resize(1); - ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (seq_id)*sizeof(float), sizeof(float)); + const uint32_t n_cls_out = hparams.n_cls_out; + + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + const llama_seq_id seq_id = ubatch.seq_id_unq[s]; + const int32_t seq_idx = ubatch.seq_idx[seq_id]; + + embd_seq_out[seq_id].resize(n_cls_out); + ggml_backend_tensor_get_async(backend_embd, t_embd, embd_seq_out[seq_id].data(), (n_cls_out*seq_idx)*sizeof(float), n_cls_out*sizeof(float)); } } break; case LLAMA_POOLING_TYPE_UNSPECIFIED: @@ -1138,7 +1165,7 @@ int llama_context::decode(const llama_batch & batch_inp) { } n_outputs_prev += n_outputs; - } while (mstate->next()); + } while (mctx->next()); // set to total number of outputs in the batch, for use in llama_get_logits_ith n_outputs = n_outputs_all; @@ -1147,7 +1174,7 @@ int llama_context::decode(const llama_batch & batch_inp) { if (n_outputs > 0) { bool sorted_output = true; - auto & out_ids = mstate->out_ids(); + auto & out_ids = balloc->get_out_ids(); GGML_ASSERT(out_ids.size() == (size_t) n_outputs); @@ -1203,10 +1230,6 @@ int llama_context::decode(const llama_batch & batch_inp) { // wait for the computation to finish (automatically done when obtaining the model output) //synchronize(); - // Reset state for the next token before backend sync, to allow the CPU activities in the reset to - // overlap with device computation. - ggml_backend_sched_reset(sched.get()); - return 0; } @@ -1224,9 +1247,8 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) { const auto n_vocab = vocab.n_tokens(); const auto n_embd = hparams.n_embd; - // TODO: use a per-batch flag for logits presence instead - bool has_logits = !cparams.embeddings; - bool has_embd = cparams.embeddings && (cparams.pooling_type == LLAMA_POOLING_TYPE_NONE); + bool has_logits = true; + bool has_embd = cparams.embeddings; // TODO: hacky enc-dec support if (model.arch == LLM_ARCH_T5) { @@ -1289,23 +1311,15 @@ uint32_t llama_context::output_reserve(int32_t n_outputs) { // graph // -int32_t llama_context::graph_max_nodes() const { - return std::max(65536, 5*model.n_tensors()); +uint32_t llama_context::graph_max_nodes() const { + return std::max(65536u, 5u*model.n_tensors()); } -ggml_cgraph * llama_context::graph_init() { - ggml_init_params params = { - /*.mem_size =*/ buf_compute_meta.size(), - /*.mem_buffer =*/ buf_compute_meta.data(), - /*.no_alloc =*/ true, - }; - - ctx_compute.reset(ggml_init(params)); - - return ggml_new_graph_custom(ctx_compute.get(), graph_max_nodes(), false); +llm_graph_result * llama_context::get_gf_res_reserve() const { + return static_cast(gf_res_reserve.get()); } -ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_state_i * mstate) { +ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx) { LLAMA_LOG_DEBUG("%s: reserving a graph for ubatch with n_tokens = %4u, n_seqs = %2u, n_outputs = %4u\n", __func__, n_tokens, n_seqs, n_outputs); if (n_tokens % n_seqs != 0) { @@ -1315,26 +1329,29 @@ ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, u LLAMA_LOG_DEBUG("%s: making n_tokens a multiple of n_seqs - n_tokens = %u, n_seqs = %u, n_outputs = %u\n", __func__, n_tokens, n_seqs, n_outputs); } + ggml_backend_sched_reset(sched.get()); + + // when the scheduler is reset, we cannnot reuse the old graph, so we reset the previous graph result to prevent that + gf_res_prev->reset(); + // store the n_outputs as it is, and restore it afterwards // TODO: not sure if needed, might simplify in the future by removing this const auto save_n_outputs = this->n_outputs; this->n_outputs = n_outputs; - llama_token token = model.vocab.token_bos(); // not actually used by llama_build_graph, but required to choose between token and embedding inputs graph - llama_ubatch ubatch = { true, n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr, nullptr, nullptr, nullptr, nullptr}; + llama_batch_allocr balloc(model.hparams.n_pos_per_embd()); + llama_ubatch ubatch = balloc.ubatch_reserve(n_tokens/n_seqs, n_seqs); - auto * gf = graph_init(); - auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT, mstate); + auto * res = gf_res_reserve.get(); - this->n_outputs = save_n_outputs; + const auto gparams = graph_params(res, ubatch, mctx, LLM_GRAPH_TYPE_DEFAULT); - if (!res) { - LLAMA_LOG_ERROR("%s: failed to build worst-case graph\n", __func__); - return nullptr; - } + res->reset(); - ggml_backend_sched_reset(sched.get()); + auto * gf = model.build_graph(gparams); + + this->n_outputs = save_n_outputs; // initialize scheduler with the specified graph if (!ggml_backend_sched_reserve(sched.get(), gf)) { @@ -1345,28 +1362,27 @@ ggml_cgraph * llama_context::graph_reserve(uint32_t n_tokens, uint32_t n_seqs, u return gf; } -llm_graph_result_ptr llama_context::graph_build( - ggml_context * ctx, - ggml_cgraph * gf, - const llama_ubatch & ubatch, - llm_graph_type gtype, - const llama_memory_state_i * mstate) { - return model.build_graph( - { - /*.ctx =*/ ctx, - /*.arch =*/ model.arch, - /*.hparams =*/ model.hparams, - /*.cparams =*/ cparams, - /*.ubatch =*/ ubatch, - /*.sched =*/ sched.get(), - /*.backend_cpu =*/ backend_cpu, - /*.cvec =*/ &cvec, - /*.loras =*/ &loras, - /*.mstate =*/ mstate, - /*.cross =*/ &cross, - /*.n_outputs =*/ n_outputs, - /*.cb =*/ graph_get_cb(), - }, gf, gtype); +llm_graph_params llama_context::graph_params( + llm_graph_result_i * res, + const llama_ubatch & ubatch, + const llama_memory_context_i * mctx, + llm_graph_type gtype) const { + return { + /*.arch =*/ model.arch, + /*.hparams =*/ model.hparams, + /*.cparams =*/ cparams, + /*.ubatch =*/ ubatch, + /*.gtype =*/ gtype, + /*.sched =*/ sched.get(), + /*.backend_cpu =*/ backend_cpu, + /*.cvec =*/ &cvec, + /*.loras =*/ &loras, + /*.mctx =*/ mctx, + /*.cross =*/ &cross, + /*.n_outputs =*/ n_outputs, + /*.cb =*/ graph_get_cb(), + /*.res =*/ res, + }; } ggml_status llama_context::graph_compute( @@ -1944,6 +1960,7 @@ llama_perf_context_data llama_context::perf_get_data() const { data.t_eval_ms = 1e-3 * t_eval_us; data.n_p_eval = std::max(1, n_p_eval); data.n_eval = std::max(1, n_eval); + data.n_reused = std::max(0, n_reused); return data; } @@ -1952,6 +1969,7 @@ void llama_context::perf_reset() { t_start_us = ggml_time_us(); t_eval_us = n_eval = 0; t_p_eval_us = n_p_eval = 0; + n_reused = 0; } // @@ -2042,19 +2060,21 @@ void llama_context::opt_epoch_iter( batch.logits [pos_batch] = true; } - const auto n_tokens_all = batch.n_tokens; + if (!balloc->init(batch, model.vocab, nullptr, model.hparams.n_embd, cparams.kv_unified ? LLAMA_MAX_SEQ : cparams.n_seq_max, true)) { + LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__); + return; + } - n_queued_tokens += n_tokens_all; + const uint32_t n_tokens_all = balloc->get_n_tokens(); - // this indicates we are doing pooled embedding - const bool embd_pooled = cparams.embeddings && cparams.pooling_type != LLAMA_POOLING_TYPE_NONE; + n_queued_tokens += n_tokens_all; embd_seq.clear(); uint32_t n_outputs_all = n_tokens_all; - auto mstate = memory->init_batch(batch, cparams.n_ubatch, embd_pooled); - if (!mstate || mstate->get_status() != LLAMA_MEMORY_STATUS_SUCCESS) { + auto mctx = memory->init_batch(*balloc, cparams.n_ubatch, true); + if (!mctx || mctx->get_status() != LLAMA_MEMORY_STATUS_SUCCESS) { LLAMA_LOG_ERROR("%s: could not initialize batch\n", __func__); break; } @@ -2067,17 +2087,22 @@ void llama_context::opt_epoch_iter( uint32_t pos_batch = 0; do { - const auto & ubatch = mstate->get_ubatch(); + const auto & ubatch = mctx->get_ubatch(); n_outputs = ubatch.n_tokens; - if (!mstate->apply()) { - LLAMA_LOG_ERROR("%s: failed to update the memory state\n", __func__); + if (!mctx->apply()) { + LLAMA_LOG_ERROR("%s: failed to update the memory context\n", __func__); break; } - auto * gf = graph_init(); - auto res = graph_build(ctx_compute.get(), gf, ubatch, LLM_GRAPH_TYPE_DEFAULT, mstate.get()); + auto * res = gf_res_prev.get(); + + const auto gparams = graph_params(res, ubatch, mctx.get(), LLM_GRAPH_TYPE_DEFAULT); + + res->reset(); + + auto * gf = model.build_graph(gparams); struct ggml_context * ctx_compute_opt; { @@ -2112,7 +2137,7 @@ void llama_context::opt_epoch_iter( ggml_free(ctx_compute_opt); pos_batch += ubatch.n_tokens; - } while (mstate->next()); + } while (mctx->next()); } } @@ -2199,6 +2224,7 @@ llama_context_params llama_context_default_params() { /*.no_perf =*/ true, /*.op_offload =*/ true, /*.swa_full =*/ true, + /*.kv_unified =*/ false, }; return result; @@ -2819,6 +2845,7 @@ void llama_perf_context_print(const llama_context * ctx) { LLAMA_LOG_INFO("%s: eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)\n", __func__, data.t_eval_ms, data.n_eval, data.t_eval_ms / data.n_eval, 1e3 / data.t_eval_ms * data.n_eval); LLAMA_LOG_INFO("%s: total time = %10.2f ms / %5d tokens\n", __func__, (t_end_ms - data.t_start_ms), (data.n_p_eval + data.n_eval)); + LLAMA_LOG_INFO("%s: graphs reused = %10d\n", __func__, data.n_reused); } void llama_perf_context_reset(llama_context * ctx) { diff --git a/src/llama-context.h b/src/llama-context.h index 040f03ae42e65..fd480af6ec875 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -18,7 +18,7 @@ class llama_io_read_i; class llama_io_write_i; struct llama_memory_i; -struct llama_memory_state_i; +struct llama_memory_context_i; struct llama_context { // init scheduler and compute buffers, reserve worst-case graphs @@ -35,8 +35,6 @@ struct llama_context { ggml_backend_sched_t get_sched() const; - ggml_context * get_ctx_compute() const; - uint32_t n_ctx() const; uint32_t n_ctx_per_seq() const; uint32_t n_batch() const; @@ -93,14 +91,14 @@ struct llama_context { int32_t il_end); // process a single ubatch with a specific graph type - // if memory_state is provided, it will be applied first to the context's memory + // if memory_context is provided, it will be applied first to the context's memory // ret contains the status of the graph computation // returns nullptr only if ret != GGML_STATUS_SUCCESS - llm_graph_result_ptr process_ubatch( - const llama_ubatch & ubatch, - llm_graph_type gtype, - llama_memory_state_i * mstate, - ggml_status & ret); + llm_graph_result_i * process_ubatch( + const llama_ubatch & ubatch, + llm_graph_type gtype, + llama_memory_context_i * mctx, + ggml_status & ret); int encode(const llama_batch & batch_inp); int decode(const llama_batch & batch_inp); @@ -188,24 +186,23 @@ struct llama_context { // public: - int32_t graph_max_nodes() const; + uint32_t graph_max_nodes() const; - // zero-out inputs and create the ctx_compute for the compute graph - ggml_cgraph * graph_init(); + // can reuse the llm_graph_result instance of the context (for example to update a memory module) + llm_graph_result * get_gf_res_reserve() const; // returns the result of ggml_backend_sched_graph_compute_async execution ggml_status graph_compute(ggml_cgraph * gf, bool batched); // reserve a graph with a dummy ubatch of the specified size - ggml_cgraph * graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_state_i * mstate); + ggml_cgraph * graph_reserve(uint32_t n_tokens, uint32_t n_seqs, uint32_t n_outputs, const llama_memory_context_i * mctx); private: - llm_graph_result_ptr graph_build( - ggml_context * ctx, - ggml_cgraph * gf, - const llama_ubatch & ubatch, - llm_graph_type gtype, - const llama_memory_state_i * mstate); + llm_graph_params graph_params( + llm_graph_result_i * res, + const llama_ubatch & ubatch, + const llama_memory_context_i * mctx, + llm_graph_type gtype) const; llm_graph_cb graph_get_cb() const; @@ -247,7 +244,7 @@ struct llama_context { std::map> embd_seq; // reuse the batch_allocr to avoid unnecessary memory allocations - std::unique_ptr batch_allocr; + std::unique_ptr balloc; uint32_t n_outputs = 0; // number of actually-used outputs in the current ubatch or last logical batch @@ -258,8 +255,6 @@ struct llama_context { ggml_backend_t backend_cpu = nullptr; std::vector backends; - ggml_context_ptr ctx_compute; - // training ggml_opt_context_t opt_ctx = nullptr; @@ -275,8 +270,8 @@ struct llama_context { std::vector backend_ptrs; std::vector backend_buft; - // memory buffers used to evaluate the model - std::vector buf_compute_meta; + llm_graph_result_ptr gf_res_prev; + llm_graph_result_ptr gf_res_reserve; // host buffer for the model output (logits and embeddings) ggml_backend_buffer_ptr buf_output; @@ -294,4 +289,6 @@ struct llama_context { mutable int32_t n_p_eval = 0; // number of tokens in eval calls for the prompt (with batch size > 1) mutable int32_t n_eval = 0; // number of eval calls + + mutable int32_t n_reused = 0; // number of times the previous graph was reused }; diff --git a/src/llama-cparams.cpp b/src/llama-cparams.cpp index f7b36590fe3e3..a3e7a37ee36d7 100644 --- a/src/llama-cparams.cpp +++ b/src/llama-cparams.cpp @@ -1,5 +1,5 @@ #include "llama-cparams.h" size_t llama_max_parallel_sequences(void) { - return LLAMA_MAX_PARALLEL_SEQUENCES; + return LLAMA_MAX_SEQ; } diff --git a/src/llama-cparams.h b/src/llama-cparams.h index 2871031ef0961..38750affc500b 100644 --- a/src/llama-cparams.h +++ b/src/llama-cparams.h @@ -4,15 +4,15 @@ #include -#define LLAMA_MAX_PARALLEL_SEQUENCES 64 +#define LLAMA_MAX_SEQ 64 struct llama_cparams { uint32_t n_ctx; // context size used during inference uint32_t n_batch; uint32_t n_ubatch; uint32_t n_seq_max; - int n_threads; // number of threads to use for generation - int n_threads_batch; // number of threads to use for batch processing + int32_t n_threads; // number of threads to use for generation + int32_t n_threads_batch; // number of threads to use for batch processing float rope_freq_base; float rope_freq_scale; @@ -33,6 +33,7 @@ struct llama_cparams { bool no_perf; bool warmup; bool op_offload; + bool kv_unified; enum llama_pooling_type pooling_type; diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 337fb5cb0df36..e27f78c2a6811 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -6,7 +6,8 @@ #include "llama-kv-cache-unified.h" #include "llama-kv-cache-unified-iswa.h" -#include "llama-kv-cache-recurrent.h" +#include "llama-memory-hybrid.h" +#include "llama-memory-recurrent.h" #include #include @@ -27,6 +28,15 @@ void llm_graph_input_embd::set_input(const llama_ubatch * ubatch) { } } +bool llm_graph_input_embd::can_reuse(const llm_graph_params & params) { + bool res = true; + + res &= (!tokens && !params.ubatch.token) || (tokens && tokens->ne[0] == params.ubatch.n_tokens); + res &= (!embd && !params.ubatch.embd) || (embd && embd->ne[0] == params.ubatch.n_tokens); + + return res; +} + void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { if (ubatch->pos && pos) { const int64_t n_tokens = ubatch->n_tokens; @@ -49,6 +59,14 @@ void llm_graph_input_pos::set_input(const llama_ubatch * ubatch) { } } +bool llm_graph_input_pos::can_reuse(const llm_graph_params & params) { + bool res = true; + + res &= pos->ne[0] == params.ubatch.n_tokens; + + return res; +} + void llm_graph_input_attn_temp::set_input(const llama_ubatch * ubatch) { if (ubatch->pos && attn_scale) { const int64_t n_tokens = ubatch->n_tokens; @@ -70,7 +88,7 @@ void llm_graph_input_pos_bucket::set_input(const llama_ubatch * ubatch) { const int64_t n_tokens = ubatch->n_tokens; GGML_ASSERT(ggml_backend_buffer_is_host(pos_bucket->buffer)); - GGML_ASSERT(!ubatch->equal_seqs); // TODO: use ubatch->n_seqs instead of failing + GGML_ASSERT(!ubatch->equal_seqs()); // TODO: use ubatch->n_seqs instead of failing int32_t * data = (int32_t *) pos_bucket->data; @@ -86,170 +104,157 @@ void llm_graph_input_pos_bucket::set_input(const llama_ubatch * ubatch) { void llm_graph_input_pos_bucket_kv::set_input(const llama_ubatch * ubatch) { if (pos_bucket) { - kv_state->set_input_pos_bucket(pos_bucket, ubatch); + mctx->set_input_pos_bucket(pos_bucket, ubatch); } } void llm_graph_input_out_ids::set_input(const llama_ubatch * ubatch) { - if (hparams.causal_attn || cparams.pooling_type == LLAMA_POOLING_TYPE_NONE) { - //GGML_ASSERT(out_ids && "every model that can must skip unused outputs"); + GGML_ASSERT(out_ids); - if (!out_ids) { - LLAMA_LOG_WARN("%s: 'out_ids' is not created\n", __func__); - } else { - const int64_t n_tokens = ubatch->n_tokens; + const int64_t n_tokens = ubatch->n_tokens; - GGML_ASSERT(ggml_backend_buffer_is_host(out_ids->buffer)); - int32_t * data = (int32_t *) out_ids->data; + GGML_ASSERT(ggml_backend_buffer_is_host(out_ids->buffer)); + int32_t * data = (int32_t *) out_ids->data; - if (n_outputs == n_tokens) { - for (int i = 0; i < n_tokens; ++i) { - data[i] = i; - } - } else if (ubatch->output) { - int32_t n_outputs = 0; - for (int i = 0; i < n_tokens; ++i) { - if (ubatch->output[i]) { - data[n_outputs++] = i; - } - } - // the graph needs to have been passed the correct number of outputs - GGML_ASSERT(n_outputs == n_outputs); - } else if (n_outputs == 1) { - // only keep last output - data[0] = n_tokens - 1; - } else { - GGML_ASSERT(n_outputs == 0); - } + if (n_outputs == n_tokens) { + for (int i = 0; i < n_tokens; ++i) { + data[i] = i; + } + + return; + } + + GGML_ASSERT(ubatch->output); + + int n_outputs = 0; + + for (int i = 0; i < n_tokens; ++i) { + if (ubatch->output[i]) { + data[n_outputs++] = i; } } } +bool llm_graph_input_out_ids::can_reuse(const llm_graph_params & params) { + bool res = true; + + res &= n_outputs == params.n_outputs; + + return res; +} + void llm_graph_input_mean::set_input(const llama_ubatch * ubatch) { if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_MEAN) { const int64_t n_tokens = ubatch->n_tokens; const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; + const int64_t n_seqs_unq = ubatch->n_seqs_unq; GGML_ASSERT(mean); GGML_ASSERT(ggml_backend_buffer_is_host(mean->buffer)); float * data = (float *) mean->data; - memset(mean->data, 0, n_tokens * n_tokens * ggml_element_size(mean)); - - std::vector sum(n_tokens, 0); + memset(mean->data, 0, n_tokens*n_seqs_unq*ggml_element_size(mean)); - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; + std::vector sums(n_seqs_unq, 0); + for (int i = 0; i < n_tokens; i += n_seq_tokens) { + for (int s = 0; s < ubatch->n_seq_id[i]; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[i][s]; + const int32_t seq_idx = ubatch->seq_idx[seq_id]; - // TODO: adapt limits to n_seqs when ubatch->equal_seqs is true - GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == MEAN"); - - sum[seq_id] += ubatch->n_seq_tokens; + sums[seq_idx] += ubatch->n_seq_tokens; + } } - std::vector div(n_tokens, 0.0f); - for (int i = 0; i < n_tokens; ++i) { - const uint64_t s = sum[i]; - if (s > 0) { - div[i] = 1.0f/float(s); + std::vector div(n_seqs_unq, 0.0f); + for (int s = 0; s < n_seqs_unq; ++s) { + const uint64_t sum = sums[s]; + if (sum > 0) { + div[s] = 1.0f/float(sum); } } - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; + for (int i = 0; i < n_tokens; i += n_seq_tokens) { + for (int s = 0; s < ubatch->n_seq_id[i]; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[i][s]; + const int32_t seq_idx = ubatch->seq_idx[seq_id]; - for (int i = 0; i < n_seq_tokens; ++i) { - data[seq_id*n_tokens + s*n_seq_tokens + i] = div[seq_id]; + for (int j = 0; j < n_seq_tokens; ++j) { + data[seq_idx*n_tokens + i + j] = div[seq_idx]; + } } } } } void llm_graph_input_cls::set_input(const llama_ubatch * ubatch) { - if (cparams.embeddings && ( - cparams.pooling_type == LLAMA_POOLING_TYPE_CLS || - cparams.pooling_type == LLAMA_POOLING_TYPE_RANK)) { - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; + const int64_t n_tokens = ubatch->n_tokens; + const int64_t n_seq_tokens = ubatch->n_seq_tokens; + const int64_t n_seqs_unq = ubatch->n_seqs_unq; + if (cparams.embeddings && ( + cparams.pooling_type == LLAMA_POOLING_TYPE_CLS || + cparams.pooling_type == LLAMA_POOLING_TYPE_RANK + )) { GGML_ASSERT(cls); GGML_ASSERT(ggml_backend_buffer_is_host(cls->buffer)); uint32_t * data = (uint32_t *) cls->data; - memset(cls->data, 0, n_tokens * ggml_element_size(cls)); - - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; + memset(cls->data, 0, n_seqs_unq*ggml_element_size(cls)); - // TODO: adapt limits to n_seqs when ubatch->equal_seqs is true - GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == CLS or RANK"); + for (int i = 0; i < n_tokens; i += n_seq_tokens) { + for (int s = 0; s < ubatch->n_seq_id[i]; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[i][s]; + const int32_t seq_idx = ubatch->seq_idx[seq_id]; - for (int i = 0; i < n_seq_tokens; ++i) { - const llama_pos pos = ubatch->pos[s*n_seq_tokens + i]; - - if (pos == 0) { - data[seq_id] = s*n_seq_tokens + i; - } + data[seq_idx] = i; } } } if (cparams.embeddings && cparams.pooling_type == LLAMA_POOLING_TYPE_LAST) { - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; - GGML_ASSERT(cls); GGML_ASSERT(ggml_backend_buffer_is_host(cls->buffer)); uint32_t * data = (uint32_t *) cls->data; - memset(cls->data, 0, n_tokens * ggml_element_size(cls)); - - std::vector last_pos(n_tokens, -1); - std::vector last_row(n_tokens, -1); + memset(cls->data, 0, n_seqs_unq*ggml_element_size(cls)); - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; + std::vector last_pos(n_seqs_unq, -1); + std::vector last_row(n_seqs_unq, -1); - // TODO: adapt limits to n_seqs when ubatch->equal_seqs is true - GGML_ASSERT(seq_id < n_tokens && "seq_id cannot be larger than n_tokens with pooling_type == LAST"); + for (int i = 0; i < n_tokens; ++i) { + const llama_pos pos = ubatch->pos[i]; - for (int i = 0; i < n_seq_tokens; ++i) { - const llama_pos pos = ubatch->pos[s*n_seq_tokens + i]; + for (int s = 0; s < ubatch->n_seq_id[i]; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[i][s]; + const int32_t seq_idx = ubatch->seq_idx[seq_id]; - if (pos >= last_pos[seq_id]) { - last_pos[seq_id] = pos; - last_row[seq_id] = s*n_seq_tokens + i; + if (pos >= last_pos[seq_idx]) { + last_pos[seq_idx] = pos; + last_row[seq_idx] = i; } } } - for (int i = 0; i < n_tokens; ++i) { - if (last_row[i] >= 0) { - data[i] = last_row[i]; + for (int s = 0; s < n_seqs_unq; ++s) { + if (last_row[s] >= 0) { + data[s] = last_row[s]; } } } } -void llm_graph_input_s_copy::set_input(const llama_ubatch * ubatch) { +void llm_graph_input_rs::set_input(const llama_ubatch * ubatch) { GGML_UNUSED(ubatch); - const int64_t n_kv = kv_state->get_n_kv(); + const int64_t n_rs = mctx->get_n_rs(); if (s_copy) { GGML_ASSERT(ggml_backend_buffer_is_host(s_copy->buffer)); int32_t * data = (int32_t *) s_copy->data; // assuming copy destinations ALWAYS happen ONLY on the cells between head and head+n - for (uint32_t i = 0; i < n_kv; ++i) { - data[i] = kv_state->s_copy(i); + for (uint32_t i = 0; i < n_rs; ++i) { + data[i] = mctx->s_copy(i); } } } @@ -265,144 +270,222 @@ void llm_graph_input_cross_embd::set_input(const llama_ubatch * ubatch) { } void llm_graph_input_attn_no_cache::set_input(const llama_ubatch * ubatch) { - if (kq_mask) { - if (cparams.causal_attn) { - const int64_t n_kv = ubatch->n_tokens; - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; - - GGML_ASSERT(ggml_backend_buffer_is_host(kq_mask->buffer)); - float * data = (float *) kq_mask->data; - - for (int h = 0; h < 1; ++h) { - for (int s1 = 0; s1 < n_seqs; ++s1) { - const llama_seq_id seq_id = ubatch->seq_id[s1][0]; - - for (int j = 0; j < n_seq_tokens; ++j) { - const int32_t tj = s1*n_seq_tokens + j; - - for (int s0 = 0; s0 < n_seqs; ++s0) { - for (int i = 0; i < n_seq_tokens; ++i) { - const int32_t ti = s0*n_seq_tokens + i; - float f = -INFINITY; - - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < ubatch->n_seq_id[s0]; ++s) { - if (ubatch->seq_id[s0][s] == seq_id && ubatch->pos[ti] <= ubatch->pos[tj]) { - if (hparams.use_alibi) { - f = -std::abs(ubatch->pos[ti] - ubatch->pos[tj]); - } else { - f = 0.0f; - } - break; - } - } - - data[h*(n_kv*n_tokens) + tj*n_kv + ti] = f; - } - } - } - } - } - } else { - const int64_t n_tokens = ubatch->n_tokens; - const int64_t n_seq_tokens = ubatch->n_seq_tokens; - const int64_t n_seqs = ubatch->n_seqs; - const int64_t n_stride = ubatch->n_tokens; - - GGML_ASSERT(ggml_backend_buffer_is_host(kq_mask->buffer)); - - float * data = (float *) kq_mask->data; - - for (int h = 0; h < 1; ++h) { - for (int s1 = 0; s1 < n_seqs; ++s1) { - const llama_seq_id seq_id = ubatch->seq_id[s1][0]; - - for (int j = 0; j < n_seq_tokens; ++j) { - const int32_t tj = s1*n_seq_tokens + j; - - for (int s0 = 0; s0 < n_seqs; ++s0) { - for (int i = 0; i < n_seq_tokens; ++i) { - const int32_t ti = s0*n_seq_tokens + i; - float f = -INFINITY; - - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < ubatch->n_seq_id[s0]; ++s) { - if (ubatch->seq_id[s0][s] == seq_id) { - if (hparams.use_alibi) { - f = -std::abs(ubatch->pos[ti] - ubatch->pos[tj]); - } else { - f = 0.0f; - } - break; - } - } - - data[h*(n_tokens*n_tokens) + tj*n_stride + ti] = f; - } - } + const int64_t n_kv = ubatch->n_tokens; + const int64_t n_tokens = ubatch->n_tokens; + + GGML_ASSERT(kq_mask); + GGML_ASSERT(ggml_backend_buffer_is_host(kq_mask->buffer)); + + float * data = (float *) kq_mask->data; + + for (int h = 0; h < 1; ++h) { + for (int i1 = 0; i1 < n_tokens; ++i1) { + const llama_seq_id s1 = ubatch->seq_id[i1][0]; + + for (int i0 = 0; i0 < n_tokens; ++i0) { + float f = -INFINITY; + + for (int s = 0; s < ubatch->n_seq_id[i0]; ++s) { + const llama_seq_id s0 = ubatch->seq_id[i0][0]; - for (int i = n_tokens; i < n_stride; ++i) { - data[h*(n_tokens*n_tokens) + tj*n_stride + i] = -INFINITY; + // TODO: reimplement this like in llama_kv_cache_unified + if (s0 == s1 && (!cparams.causal_attn || ubatch->pos[i0] <= ubatch->pos[i1])) { + if (hparams.use_alibi) { + f = -std::abs(ubatch->pos[i0] - ubatch->pos[i1]); + } else { + f = 0.0f; } + break; } } + + data[h*(n_kv*n_tokens) + i1*n_kv + i0] = f; } } } } void llm_graph_input_attn_kv_unified::set_input(const llama_ubatch * ubatch) { - if (self_kq_mask) { - kv_state->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn); - } + mctx->set_input_k_idxs(self_k_idxs, ubatch); + mctx->set_input_v_idxs(self_v_idxs, ubatch); + + mctx->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn); +} + +bool llm_graph_input_attn_kv_unified::can_reuse(const llm_graph_params & params) { + const auto * mctx = static_cast(params.mctx); + + this->mctx = mctx; + + bool res = true; + + res &= self_k_idxs->ne[0] == params.ubatch.n_tokens; + //res &= self_v_idxs->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there + + res &= self_kq_mask->ne[0] == mctx->get_n_kv(); + res &= self_kq_mask->ne[1] == GGML_PAD(params.ubatch.n_tokens, GGML_KQ_MASK_PAD); + + res &= mctx->get_supports_set_rows(); // TODO: tmp + + return res; } void llm_graph_input_attn_kv_unified_iswa::set_input(const llama_ubatch * ubatch) { - if (self_kq_mask) { - kv_state->get_base()->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn); - } + mctx->get_base()->set_input_k_idxs(self_k_idxs, ubatch); + mctx->get_base()->set_input_v_idxs(self_v_idxs, ubatch); - if (self_kq_mask_swa) { - kv_state->get_swa()->set_input_kq_mask(self_kq_mask_swa, ubatch, cparams.causal_attn); - } + mctx->get_base()->set_input_kq_mask(self_kq_mask, ubatch, cparams.causal_attn); + + mctx->get_swa()->set_input_k_idxs(self_k_idxs_swa, ubatch); + mctx->get_swa()->set_input_v_idxs(self_v_idxs_swa, ubatch); + + mctx->get_swa()->set_input_kq_mask(self_kq_mask_swa, ubatch, cparams.causal_attn); +} + +bool llm_graph_input_attn_kv_unified_iswa::can_reuse(const llm_graph_params & params) { + const auto * mctx = static_cast(params.mctx); + + this->mctx = mctx; + + bool res = true; + + res &= self_k_idxs->ne[0] == params.ubatch.n_tokens; + //res &= self_v_idxs->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there + + res &= self_k_idxs_swa->ne[0] == params.ubatch.n_tokens; + //res &= self_v_idxs_swa->ne[0] == params.ubatch.n_tokens; // TODO: need to move this to the unified cache and check there + + res &= self_kq_mask->ne[0] == mctx->get_base()->get_n_kv(); + res &= self_kq_mask->ne[1] == GGML_PAD(params.ubatch.n_tokens, GGML_KQ_MASK_PAD); + + res &= self_kq_mask_swa->ne[0] == mctx->get_swa()->get_n_kv(); + res &= self_kq_mask_swa->ne[1] == GGML_PAD(params.ubatch.n_tokens, GGML_KQ_MASK_PAD); + + res &= mctx->get_base()->get_supports_set_rows(); // TODO: tmp + + return res; } void llm_graph_input_attn_cross::set_input(const llama_ubatch * ubatch) { - if (cross_kq_mask) { - const int64_t n_enc = cross_kq_mask->ne[0]; - const int64_t n_tokens = ubatch->n_tokens; + GGML_ASSERT(cross_kq_mask); - GGML_ASSERT(ggml_backend_buffer_is_host(cross_kq_mask->buffer)); - GGML_ASSERT(!ubatch->equal_seqs); // TODO: use ubatch->n_seqs instead of failing + const int64_t n_enc = cross_kq_mask->ne[0]; + const int64_t n_tokens = ubatch->n_tokens; - float * data = (float *) cross_kq_mask->data; + GGML_ASSERT(ggml_backend_buffer_is_host(cross_kq_mask->buffer)); + GGML_ASSERT(!ubatch->equal_seqs()); // TODO: use ubatch->n_seqs instead of failing - for (int h = 0; h < 1; ++h) { - for (int j = 0; j < n_tokens; ++j) { - for (int i = 0; i < n_enc; ++i) { - float f = -INFINITY; - // TODO: fix indexing [UBATCH_IDX] - for (int s = 0; s < ubatch->n_seq_id[j]; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[j][s]; - if (cross->seq_ids_enc[i].find(seq_id) != cross->seq_ids_enc[i].end()) { - f = 0.0f; - } + float * data = (float *) cross_kq_mask->data; + + for (int h = 0; h < 1; ++h) { + for (int i = 0; i < n_tokens; ++i) { + for (int j = 0; j < n_enc; ++j) { + float f = -INFINITY; + + for (int s = 0; s < ubatch->n_seq_id[i]; ++s) { + const llama_seq_id seq_id = ubatch->seq_id[i][s]; + + if (cross->seq_ids_enc[j].find(seq_id) != cross->seq_ids_enc[j].end()) { + f = 0.0f; } - data[h*(n_enc*n_tokens) + j*n_enc + i] = f; } + + data[h*(n_enc*n_tokens) + i*n_enc + j] = f; } + } - for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { - for (int j = 0; j < n_enc; ++j) { - data[h*(n_enc*n_tokens) + i*n_enc + j] = -INFINITY; - } + for (int i = n_tokens; i < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++i) { + for (int j = 0; j < n_enc; ++j) { + data[h*(n_enc*n_tokens) + i*n_enc + j] = -INFINITY; } } } } +void llm_graph_input_mem_hybrid::set_input(const llama_ubatch * ubatch) { + inp_attn->set_input(ubatch); + inp_rs->set_input(ubatch); +} + +// +// llm_graph_result +// + +llm_graph_result::llm_graph_result(int64_t max_nodes) : max_nodes(max_nodes) { + reset(); + + const char * LLAMA_GRAPH_RESULT_DEBUG = getenv("LLAMA_GRAPH_RESULT_DEBUG"); + debug = LLAMA_GRAPH_RESULT_DEBUG ? atoi(LLAMA_GRAPH_RESULT_DEBUG) : 0; +} + +int64_t llm_graph_result::get_max_nodes() const { + return max_nodes; +} + +void llm_graph_result::reset() { + t_tokens = nullptr; + t_logits = nullptr; + t_embd = nullptr; + t_embd_pooled = nullptr; + + inputs.clear(); + + buf_compute_meta.resize(ggml_tensor_overhead()*max_nodes + ggml_graph_overhead_custom(max_nodes, false)); + + ggml_init_params params = { + /*.mem_size =*/ buf_compute_meta.size(), + /*.mem_buffer =*/ buf_compute_meta.data(), + /*.no_alloc =*/ true, + }; + + ctx_compute.reset(ggml_init(params)); + + gf = ggml_new_graph_custom(ctx_compute.get(), max_nodes, false); +} + +void llm_graph_result::set_inputs(const llama_ubatch * ubatch) { + for (auto & input : inputs) { + input->set_input(ubatch); + } +} + +bool llm_graph_result::can_reuse(const llm_graph_params & params) { + if (!this->params.allow_reuse(params)) { + if (debug > 1) { + LLAMA_LOG_DEBUG("%s: cannot reuse graph due to incompatible graph parameters\n", __func__); + } + + return false; + } + + if (debug > 1) { + LLAMA_LOG_DEBUG("%s: checking compatibility of %d inputs:\n", __func__, (int) inputs.size()); + } + + bool res = true; + + for (auto & input : inputs) { + const bool cur = input->can_reuse(params); + + if (debug > 1) { + LLAMA_LOG_DEBUG("%s: can_reuse = %d\n", "placeholder", cur); + } + + res = res && cur; + } + + if (debug > 0) { + LLAMA_LOG_DEBUG("%s: can reuse graph = %d\n", __func__, res); + } + + return res; +} + +llm_graph_input_i * llm_graph_result::add_input(llm_graph_input_ptr input) { + inputs.emplace_back(std::move(input)); + return inputs.back().get(); +} + // // llm_graph_context // @@ -437,21 +520,18 @@ llm_graph_context::llm_graph_context(const llm_graph_params & params) : n_ctx_orig (cparams.n_ctx_orig_yarn), pooling_type (cparams.pooling_type), rope_type (hparams.rope_type), - ctx0 (params.ctx), sched (params.sched), backend_cpu (params.backend_cpu), cvec (params.cvec), loras (params.loras), - mstate (params.mstate), + mctx (params.mctx), cross (params.cross), cb_func (params.cb), - res (std::make_unique()) { + res (static_cast(params.res)), + ctx0 (res->get_ctx()) { + res->params = params; } -int64_t llm_graph_context::n_pos_per_embd() const { - return hparams.rope_type == LLAMA_ROPE_TYPE_MROPE ? 4 : 1; -} - void llm_graph_context::cb(ggml_tensor * cur, const char * name, int il) const { if (cb_func) { cb_func(ubatch, cur, name, il); @@ -611,12 +691,20 @@ ggml_tensor * llm_graph_context::build_ffn( switch (type_op) { case LLM_FFN_SILU: - { + if (gate && type_gate == LLM_FFN_PAR) { + cur = ggml_swiglu_split(ctx0, cur, tmp); + cb(cur, "ffn_swiglu", il); + type_gate = LLM_FFN_SEQ; + } else { cur = ggml_silu(ctx0, cur); cb(cur, "ffn_silu", il); } break; case LLM_FFN_GELU: - { + if (gate && type_gate == LLM_FFN_PAR) { + cur = ggml_geglu_split(ctx0, cur, tmp); + cb(cur, "ffn_geglu", il); + type_gate = LLM_FFN_SEQ; + } else { cur = ggml_gelu(ctx0, cur); cb(cur, "ffn_gelu", il); if (act_scales != NULL) { @@ -625,7 +713,11 @@ ggml_tensor * llm_graph_context::build_ffn( } } break; case LLM_FFN_RELU: - { + if (gate && type_gate == LLM_FFN_PAR) { + cur = ggml_reglu_split(ctx0, cur, tmp); + cb(cur, "ffn_reglu", il); + type_gate = LLM_FFN_SEQ; + } else { cur = ggml_relu(ctx0, cur); cb(cur, "ffn_relu", il); } break; @@ -639,32 +731,19 @@ ggml_tensor * llm_graph_context::build_ffn( } break; case LLM_FFN_SWIGLU: { - // Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf - int64_t split_point = cur->ne[0] / 2; - // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217 - ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0)); - ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur))); - - x0 = ggml_silu(ctx0, x0); - cb(cur, "ffn_silu", il); - - cur = ggml_mul(ctx0, x0, x1); - cb(cur, "ffn_mul", il); + cur = ggml_swiglu(ctx0, cur); + cb(cur, "ffn_swiglu", il); } break; case LLM_FFN_GEGLU: { - // Split into two equal parts - int64_t split_point = cur->ne[0] / 2; - // TODO: these conts should not be needed, see https://github.com/ggml-org/llama.cpp/pull/14090#discussion_r2137437217 - ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0)); - ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur))); - - x0 = ggml_gelu(ctx0, x0); - cb(x0, "ffn_gelu", il); - - cur = ggml_mul(ctx0, x0, x1); + cur = ggml_geglu(ctx0, cur); cb(cur, "ffn_geglu", il); } break; + case LLM_FFN_REGLU: + { + cur = ggml_reglu(ctx0, cur); + cb(cur, "ffn_reglu", il); + } break; } if (gate && type_gate == LLM_FFN_PAR) { @@ -794,12 +873,18 @@ ggml_tensor * llm_graph_context::build_moe_ffn( switch (type_op) { case LLM_FFN_SILU: - { + if (gate_exps) { + cur = ggml_swiglu_split(ctx0, cur, up); + cb(cur, "ffn_moe_swiglu", il); + } else { cur = ggml_silu(ctx0, cur); cb(cur, "ffn_moe_silu", il); } break; case LLM_FFN_GELU: - { + if (gate_exps) { + cur = ggml_geglu_split(ctx0, cur, up); + cb(cur, "ffn_moe_geglu", il); + } else { cur = ggml_gelu(ctx0, cur); cb(cur, "ffn_moe_gelu", il); } break; @@ -807,11 +892,6 @@ ggml_tensor * llm_graph_context::build_moe_ffn( GGML_ABORT("fatal error"); } - if (gate_exps) { - cur = ggml_mul(ctx0, cur, up); // [n_ff, n_expert_used, n_tokens] - cb(cur, "ffn_moe_gate_par", il); - } - experts = build_lora_mm_id(down_exps, cur, selected_experts); // [n_embd, n_expert_used, n_tokens] cb(experts, "ffn_moe_down", il); @@ -896,11 +976,11 @@ ggml_tensor * llm_graph_context::build_inp_embd(ggml_tensor * tok_embd) const { } ggml_tensor * llm_graph_context::build_inp_pos() const { - auto inp = std::make_unique(n_pos_per_embd()); + auto inp = std::make_unique(hparams.n_pos_per_embd()); auto & cur = inp->pos; - cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens*n_pos_per_embd()); + cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, (int64_t)n_tokens*hparams.n_pos_per_embd()); ggml_set_input(cur); res->add_input(std::move(inp)); @@ -923,6 +1003,14 @@ ggml_tensor * llm_graph_context::build_inp_attn_scale() const { } ggml_tensor * llm_graph_context::build_inp_out_ids() const { + // note: when all tokens are output, we could skip this optimization to spare the ggml_get_rows() calls, + // but this would make the graph topology depend on the number of output tokens, which can interere with + // features that require constant topology such as pipline parallelism + // ref: https://github.com/ggml-org/llama.cpp/pull/14275#issuecomment-2987424471 + //if (n_outputs < n_tokens) { + // return nullptr; + //} + auto inp = std::make_unique(hparams, cparams, n_outputs); auto & cur = inp->out_ids; @@ -940,7 +1028,7 @@ ggml_tensor * llm_graph_context::build_inp_mean() const { auto & cur = inp->mean; - cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, n_tokens); + cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, ubatch.n_seqs_unq); ggml_set_input(cur); res->add_input(std::move(inp)); @@ -953,24 +1041,7 @@ ggml_tensor * llm_graph_context::build_inp_cls() const { auto & cur = inp->cls; - cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_tokens); - ggml_set_input(cur); - - res->add_input(std::move(inp)); - - return cur; -} - -ggml_tensor * llm_graph_context::build_inp_s_copy() const { - const auto * kv_state = static_cast(mstate); - - auto inp = std::make_unique(kv_state); - - const auto n_kv = kv_state->get_n_kv(); - - auto & cur = inp->s_copy; - - cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_kv); + cur = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ubatch.n_seqs_unq); ggml_set_input(cur); res->add_input(std::move(inp)); @@ -1016,11 +1087,11 @@ ggml_tensor * llm_graph_context::build_inp_pos_bucket_enc() const { } ggml_tensor * llm_graph_context::build_inp_pos_bucket_dec() const { - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = static_cast(mctx); - auto inp = std::make_unique(hparams, kv_state); + auto inp = std::make_unique(hparams, mctx_cur); - const auto n_kv = kv_state->get_n_kv(); + const auto n_kv = mctx_cur->get_n_kv(); auto & cur = inp->pos_bucket; @@ -1058,13 +1129,16 @@ ggml_tensor * llm_graph_context::build_attn_mha( float kq_scale) const { const bool v_trans = v->nb[1] > v->nb[2]; + // split the batch into streams if needed + const auto n_stream = k->ne[3]; + + q = ggml_reshape_4d(ctx0, q, q->ne[0], q->ne[1], q->ne[2]/n_stream, n_stream); + q = ggml_permute(ctx0, q, 0, 2, 1, 3); k = ggml_permute(ctx0, k, 0, 2, 1, 3); v = ggml_permute(ctx0, v, 0, 2, 1, 3); - const auto n_tokens = q->ne[1]; - const auto n_head = q->ne[2]; - const auto n_kv = k->ne[1]; + const auto n_kv = k->ne[1]; ggml_tensor * cur; @@ -1106,7 +1180,7 @@ ggml_tensor * llm_graph_context::build_attn_mha( #endif } - cur = ggml_reshape_2d(ctx0, cur, cur->ne[0]*n_head, n_tokens); + cur = ggml_reshape_2d(ctx0, cur, cur->ne[0]*cur->ne[1], cur->ne[2]*cur->ne[3]); } else { ggml_tensor * kq = ggml_mul_mat(ctx0, k, q); @@ -1151,7 +1225,8 @@ ggml_tensor * llm_graph_context::build_attn_mha( cur = ggml_permute(ctx0, kqv, 0, 2, 1, 3); - cur = ggml_cont_2d(ctx0, cur, cur->ne[0]*n_head, n_tokens); + // recombine streams + cur = ggml_cont_2d(ctx0, cur, cur->ne[0]*cur->ne[1], cur->ne[2]*cur->ne[3]); if (!cparams.offload_kqv) { // all nodes between the KV store and the attention output are run on the CPU @@ -1168,8 +1243,7 @@ llm_graph_input_attn_no_cache * llm_graph_context::build_attn_inp_no_cache() con auto inp = std::make_unique(hparams, cparams); // note: there is no KV cache, so the number of KV values is equal to the number of tokens in the batch - inp->kq_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_tokens, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD)); - //cb(inp_kq_mask, "KQ_mask", -1); + inp->kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_tokens, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD), 1, 1); ggml_set_input(inp->kq_mask); inp->kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->kq_mask, GGML_TYPE_F16) : inp->kq_mask; @@ -1199,6 +1273,10 @@ ggml_tensor * llm_graph_context::build_attn( const auto & kq_mask = inp->get_kq_mask(); + // [TAG_NO_CACHE_PAD] + // TODO: if ubatch.equal_seqs() == true, we can split the three tensors below into ubatch.n_seqs_unq streams + assert(!ubatch.equal_seqs()); + ggml_tensor * q = q_cur; ggml_tensor * k = k_cur; ggml_tensor * v = v_cur; @@ -1221,23 +1299,39 @@ ggml_tensor * llm_graph_context::build_attn( return cur; } -llm_graph_input_attn_kv_unified * llm_graph_context::build_attn_inp_kv_unified() const { - const auto * kv_state = static_cast(mstate); +static std::unique_ptr build_attn_inp_kv_unified_impl( + ggml_context * ctx0, + const llama_ubatch & ubatch, + const llama_hparams & hparams, + const llama_cparams & cparams, + const llama_kv_cache_unified_context * mctx_cur) { - auto inp = std::make_unique(hparams, cparams, kv_state); + auto inp = std::make_unique(hparams, cparams, mctx_cur); { GGML_ASSERT(hparams.swa_type == LLAMA_SWA_TYPE_NONE && "Use llama_kv_cache_unified_iswa for SWA"); - const auto n_kv = kv_state->get_n_kv(); + const auto n_kv = mctx_cur->get_n_kv(); + const auto n_tokens = ubatch.n_tokens; + const auto n_stream = cparams.kv_unified ? 1 : ubatch.n_seqs_unq; + + inp->self_k_idxs = mctx_cur->build_input_k_idxs(ctx0, ubatch); + inp->self_v_idxs = mctx_cur->build_input_v_idxs(ctx0, ubatch); - inp->self_kq_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD)); - //cb(inp->self_kq_mask, "KQ_mask", -1); + inp->self_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens/n_stream, GGML_KQ_MASK_PAD), 1, n_stream); ggml_set_input(inp->self_kq_mask); inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask; } + return inp; +} + +llm_graph_input_attn_kv_unified * llm_graph_context::build_attn_inp_kv_unified() const { + const auto * mctx_cur = static_cast(mctx); + + auto inp = build_attn_inp_kv_unified_impl(ctx0, ubatch, hparams, cparams, mctx_cur); + return (llm_graph_input_attn_kv_unified *) res->add_input(std::move(inp)); } @@ -1259,19 +1353,22 @@ ggml_tensor * llm_graph_context::build_attn( ggml_build_forward_expand(gf, k_cur); ggml_build_forward_expand(gf, v_cur); - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = inp->mctx; // store to KV cache { - ggml_build_forward_expand(gf, kv_state->cpy_k(ctx0, k_cur, il)); - ggml_build_forward_expand(gf, kv_state->cpy_v(ctx0, v_cur, il)); + const auto & k_idxs = inp->get_k_idxs(); + const auto & v_idxs = inp->get_v_idxs(); + + ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, k_idxs, il)); + ggml_build_forward_expand(gf, mctx_cur->cpy_v(ctx0, v_cur, v_idxs, il)); } const auto & kq_mask = inp->get_kq_mask(); ggml_tensor * q = q_cur; - ggml_tensor * k = kv_state->get_k(ctx0, il); - ggml_tensor * v = kv_state->get_v(ctx0, il); + ggml_tensor * k = mctx_cur->get_k(ctx0, il); + ggml_tensor * v = mctx_cur->get_v(ctx0, il); ggml_tensor * cur = build_attn_mha(gf, q, k, v, kq_b, kq_mask, v_mla, kq_scale); cb(cur, "kqv_out", il); @@ -1291,36 +1388,6 @@ ggml_tensor * llm_graph_context::build_attn( return cur; } -llm_graph_input_attn_kv_unified_iswa * llm_graph_context::build_attn_inp_kv_unified_iswa() const { - const auto * kv_state = static_cast(mstate); - - auto inp = std::make_unique(hparams, cparams, kv_state); - - { - const auto n_kv = kv_state->get_base()->get_n_kv(); - - inp->self_kq_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD)); - //cb(inp->self_kq_mask, "KQ_mask", -1); - ggml_set_input(inp->self_kq_mask); - - inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask; - } - - { - GGML_ASSERT(hparams.swa_type != LLAMA_SWA_TYPE_NONE && "Use llama_kv_cache_unified for non-SWA"); - - const auto n_kv = kv_state->get_swa()->get_n_kv(); - - inp->self_kq_mask_swa = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD)); - //cb(inp->self_kq_mask_swa, "KQ_mask_swa", -1); - ggml_set_input(inp->self_kq_mask_swa); - - inp->self_kq_mask_swa_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask_swa, GGML_TYPE_F16) : inp->self_kq_mask_swa; - } - - return (llm_graph_input_attn_kv_unified_iswa *) res->add_input(std::move(inp)); -} - ggml_tensor * llm_graph_context::build_attn( llm_graph_input_attn_kv_unified_iswa * inp, ggml_cgraph * gf, @@ -1336,26 +1403,39 @@ ggml_tensor * llm_graph_context::build_attn( // these nodes are added to the graph together so that they are not reordered // by doing so, the number of splits in the graph is reduced ggml_build_forward_expand(gf, q_cur); - ggml_build_forward_expand(gf, k_cur); - ggml_build_forward_expand(gf, v_cur); - const auto * kv_state_iswa = static_cast(mstate); + if (k_cur) { + ggml_build_forward_expand(gf, k_cur); + } + + if (v_cur) { + ggml_build_forward_expand(gf, v_cur); + } + + const auto * mctx_iswa = inp->mctx; const bool is_swa = hparams.is_swa(il); - const auto * kv_state = is_swa ? kv_state_iswa->get_swa() : kv_state_iswa->get_base(); + const auto * mctx_cur = is_swa ? mctx_iswa->get_swa() : mctx_iswa->get_base(); - // store to KV cache - { - ggml_build_forward_expand(gf, kv_state->cpy_k(ctx0, k_cur, il)); - ggml_build_forward_expand(gf, kv_state->cpy_v(ctx0, v_cur, il)); + // optionally store to KV cache + if (k_cur) { + const auto & k_idxs = is_swa ? inp->get_k_idxs_swa() : inp->get_k_idxs(); + + ggml_build_forward_expand(gf, mctx_cur->cpy_k(ctx0, k_cur, k_idxs, il)); + } + + if (v_cur) { + const auto & v_idxs = is_swa ? inp->get_v_idxs_swa() : inp->get_v_idxs(); + + ggml_build_forward_expand(gf, mctx_cur->cpy_v(ctx0, v_cur, v_idxs, il)); } const auto & kq_mask = is_swa ? inp->get_kq_mask_swa() : inp->get_kq_mask(); ggml_tensor * q = q_cur; - ggml_tensor * k = kv_state->get_k(ctx0, il); - ggml_tensor * v = kv_state->get_v(ctx0, il); + ggml_tensor * k = mctx_cur->get_k(ctx0, il); + ggml_tensor * v = mctx_cur->get_v(ctx0, il); ggml_tensor * cur = build_attn_mha(gf, q, k, v, kq_b, kq_mask, v_mla, kq_scale); cb(cur, "kqv_out", il); @@ -1380,7 +1460,7 @@ llm_graph_input_attn_cross * llm_graph_context::build_attn_inp_cross() const { const int32_t n_enc = !cross->v_embd.empty() ? cross->n_enc : hparams.n_ctx_train; - inp->cross_kq_mask = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_enc, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD)); + inp->cross_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_enc, GGML_PAD(n_tokens, GGML_KQ_MASK_PAD), 1, 1); ggml_set_input(inp->cross_kq_mask); inp->cross_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->cross_kq_mask, GGML_TYPE_F16) : inp->cross_kq_mask; @@ -1430,39 +1510,69 @@ ggml_tensor * llm_graph_context::build_attn( return cur; } -ggml_tensor * llm_graph_context::build_recurrent_state( - ggml_cgraph * gf, - ggml_tensor * s, - ggml_tensor * state_copy, - int32_t state_size, - int32_t n_seqs, - bool avoid_copies) const { - const auto * kv_state = static_cast(mstate); +// TODO: maybe separate the inner implementation into a separate function +// like with the non-sliding window equivalent +// once sliding-window hybrid caches are a thing. +llm_graph_input_attn_kv_unified_iswa * llm_graph_context::build_attn_inp_kv_unified_iswa() const { + const auto * mctx_cur = static_cast(mctx); - const auto n_kv = kv_state->get_n_kv(); - const auto kv_head = kv_state->get_head(); - const auto rs_zero = kv_state->get_rs_z(); + auto inp = std::make_unique(hparams, cparams, mctx_cur); - ggml_tensor * states = ggml_reshape_2d(ctx0, s, state_size, kv_state->get_size()); + const auto n_stream = cparams.kv_unified ? 1 : ubatch.n_seqs_unq; + + { + const auto n_kv = mctx_cur->get_base()->get_n_kv(); + + inp->self_k_idxs = mctx_cur->get_base()->build_input_k_idxs(ctx0, ubatch); + inp->self_v_idxs = mctx_cur->get_base()->build_input_v_idxs(ctx0, ubatch); + + inp->self_kq_mask = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens/n_stream, GGML_KQ_MASK_PAD), 1, n_stream); + ggml_set_input(inp->self_kq_mask); + + inp->self_kq_mask_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask, GGML_TYPE_F16) : inp->self_kq_mask; + } + + { + GGML_ASSERT(hparams.swa_type != LLAMA_SWA_TYPE_NONE && "Use llama_kv_cache_unified for non-SWA"); + + const auto n_kv = mctx_cur->get_swa()->get_n_kv(); + + inp->self_k_idxs_swa = mctx_cur->get_swa()->build_input_k_idxs(ctx0, ubatch); + inp->self_v_idxs_swa = mctx_cur->get_swa()->build_input_v_idxs(ctx0, ubatch); + + inp->self_kq_mask_swa = ggml_new_tensor_4d(ctx0, GGML_TYPE_F32, n_kv, GGML_PAD(n_tokens/n_stream, GGML_KQ_MASK_PAD), 1, n_stream); + ggml_set_input(inp->self_kq_mask_swa); + + inp->self_kq_mask_swa_cnv = cparams.flash_attn ? ggml_cast(ctx0, inp->self_kq_mask_swa, GGML_TYPE_F16) : inp->self_kq_mask_swa; + } + + return (llm_graph_input_attn_kv_unified_iswa *) res->add_input(std::move(inp)); +} + +ggml_tensor * llm_graph_context::build_rs( + ggml_cgraph * gf, + ggml_tensor * s, + ggml_tensor * state_copy, + int32_t state_size, + int32_t n_seqs, + uint32_t n_kv, + uint32_t kv_head, + uint32_t kv_size, + int32_t rs_zero, + const llm_graph_get_rows_fn & get_state_rows) const { + + ggml_tensor * states = ggml_reshape_2d(ctx0, s, state_size, kv_size); // Clear a single state which will then be copied to the other cleared states. // Note that this is a no-op when the view is zero-sized. ggml_tensor * state_zero = ggml_view_1d(ctx0, states, state_size*(rs_zero >= 0), rs_zero*states->nb[1]*(rs_zero >= 0)); ggml_build_forward_expand(gf, ggml_scale_inplace(ctx0, state_zero, 0)); - ggml_tensor * output_states; - - if (!avoid_copies) { - // copy states - // NOTE: assuming the copy destinations are ALL contained between kv_head and kv_head + n_kv - // {state_size, kv_size} -> {state_size, n_seqs} - output_states = ggml_get_rows(ctx0, states, ggml_view_1d(ctx0, state_copy, n_seqs, 0)); - ggml_build_forward_expand(gf, output_states); - } else { - // FIXME: make the gathering operation happen before the copy below - // (maybe with an optional lambda function passed as a parameter instead of `avoid_copies`?) - output_states = states; - } + // copy states + // NOTE: assuming the copy destinations are ALL contained between kv_head and kv_head + n_kv + // {state_size, kv_size} -> {state_size, n_seqs} + ggml_tensor * output_states = get_state_rows(ctx0, states, ggml_view_1d(ctx0, state_copy, n_seqs, 0)); + ggml_build_forward_expand(gf, output_states); // copy extra states which won't be changed further (between n_seqs and n_kv) ggml_tensor * states_extra = ggml_get_rows(ctx0, states, ggml_view_1d(ctx0, state_copy, n_kv - n_seqs, n_seqs*state_copy->nb[0])); @@ -1474,22 +1584,56 @@ ggml_tensor * llm_graph_context::build_recurrent_state( return output_states; } +static std::unique_ptr build_rs_inp_impl( + ggml_context * ctx0, + const llama_memory_recurrent_context * mctx_cur) { + + auto inp = std::make_unique(mctx_cur); + + const auto n_rs = mctx_cur->get_n_rs(); + + inp->s_copy = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, n_rs); + ggml_set_input(inp->s_copy); + + return inp; +} + +llm_graph_input_rs * llm_graph_context::build_rs_inp() const { + const auto * mctx_cur = static_cast(mctx); + + auto inp = build_rs_inp_impl(ctx0, mctx_cur); + + return (llm_graph_input_rs *) res->add_input(std::move(inp)); +} + +ggml_tensor * llm_graph_context::build_rs( + llm_graph_input_rs * inp, + ggml_cgraph * gf, + ggml_tensor * s, + int32_t state_size, + int32_t n_seqs, + const llm_graph_get_rows_fn & get_state_rows) const { + const auto * kv_state = inp->mctx; + + return build_rs(gf, s, inp->s_copy, state_size, n_seqs, kv_state->get_n_rs(), kv_state->get_head(), kv_state->get_size(), kv_state->get_rs_z(), get_state_rows); +} + ggml_tensor * llm_graph_context::build_rwkv_token_shift_load( - ggml_cgraph * gf, - ggml_tensor * state_copy, - const llama_ubatch & ubatch, + llm_graph_input_rs * inp, + ggml_cgraph * gf, + const llama_ubatch & ubatch, int il) const { - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = static_cast(mctx); const auto token_shift_count = hparams.token_shift_count; const int64_t n_seqs = ubatch.n_seqs; - ggml_tensor * token_shift_all = kv_state->get_k_l(il); + ggml_tensor * token_shift_all = mctx_cur->get_r_l(il); - ggml_tensor * token_shift = build_recurrent_state( - gf, token_shift_all, state_copy, - hparams.n_embd_k_s(), n_seqs); + ggml_tensor * token_shift = build_rs( + inp, gf, token_shift_all, + hparams.n_embd_r(), n_seqs); token_shift = ggml_reshape_3d(ctx0, token_shift, hparams.n_embd, token_shift_count, n_seqs); @@ -1500,22 +1644,33 @@ ggml_tensor * llm_graph_context::build_rwkv_token_shift_store( ggml_tensor * token_shift, const llama_ubatch & ubatch, int il) const { - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = static_cast(mctx); const auto token_shift_count = hparams.token_shift_count; const auto n_embd = hparams.n_embd; const int64_t n_seqs = ubatch.n_seqs; - const auto kv_head = kv_state->get_head(); + const auto kv_head = mctx_cur->get_head(); return ggml_cpy( ctx0, ggml_view_1d(ctx0, token_shift, n_embd * n_seqs * token_shift_count, 0), - ggml_view_1d(ctx0, kv_state->get_k_l(il), hparams.n_embd_k_s()*n_seqs, hparams.n_embd_k_s()*kv_head*ggml_element_size(kv_state->get_k_l(il))) + ggml_view_1d(ctx0, mctx_cur->get_r_l(il), hparams.n_embd_r()*n_seqs, hparams.n_embd_r()*kv_head*ggml_element_size(mctx_cur->get_r_l(il))) ); } +llm_graph_input_mem_hybrid * llm_graph_context::build_inp_mem_hybrid() const { + const auto * mctx_cur = static_cast(mctx); + + auto inp_rs = build_rs_inp_impl(ctx0, mctx_cur->get_recr()); + auto inp_attn = build_attn_inp_kv_unified_impl(ctx0, ubatch, hparams, cparams, mctx_cur->get_attn()); + + auto inp = std::make_unique(std::move(inp_attn), std::move(inp_rs), mctx_cur); + + return (llm_graph_input_mem_hybrid *) res->add_input(std::move(inp)); +} + void llm_graph_context::build_pooling( ggml_cgraph * gf, ggml_tensor * cls, diff --git a/src/llama-graph.h b/src/llama-graph.h index 87813119b1a3c..42e636e0e3f6c 100644 --- a/src/llama-graph.h +++ b/src/llama-graph.h @@ -1,6 +1,7 @@ #pragma once #include "llama-arch.h" +#include "llama-batch.h" #include "llama-hparams.h" #include "llama-adapter.h" @@ -14,14 +15,14 @@ struct ggml_cgraph; struct ggml_context; struct ggml_tensor; -struct llama_ubatch; struct llama_cparams; -struct llama_memory_state_i; +struct llama_memory_context_i; -class llama_kv_cache_unified_state; -class llama_kv_cache_unified_iswa_state; -class llama_kv_cache_recurrent_state; +class llama_kv_cache_unified_context; +class llama_kv_cache_unified_iswa_context; +class llama_memory_recurrent_context; +class llama_memory_hybrid_context; // certain models (typically multi-modal) can produce different types of graphs enum llm_graph_type { @@ -37,6 +38,7 @@ enum llm_ffn_op_type { LLM_FFN_RELU_SQR, LLM_FFN_SWIGLU, LLM_FFN_GEGLU, + LLM_FFN_REGLU, }; enum llm_ffn_gate_type { @@ -67,6 +69,8 @@ struct llama_cross { std::vector> seq_ids_enc; }; +struct llm_graph_params; + // // llm_graph_input // @@ -76,11 +80,19 @@ class llm_graph_input_i { virtual ~llm_graph_input_i() = default; virtual void set_input(const llama_ubatch * ubatch) = 0; + + // return true if the resulting input tensors using the provided graph parameters would be + // the same as the previous input tensors that we have currently stored in the object + virtual bool can_reuse(const llm_graph_params & params) { + // returning false here by default will prevent from reusing the graph if the check + // for the input type has not been implemented yet + GGML_UNUSED(params); + return false; + } }; using llm_graph_input_ptr = std::unique_ptr; - class llm_graph_input_embd : public llm_graph_input_i { public: llm_graph_input_embd() = default; @@ -88,20 +100,24 @@ class llm_graph_input_embd : public llm_graph_input_i { void set_input(const llama_ubatch * ubatch) override; + bool can_reuse(const llm_graph_params & params) override; + ggml_tensor * tokens = nullptr; // I32 [n_batch] ggml_tensor * embd = nullptr; // F32 [n_embd, n_batch] }; class llm_graph_input_pos : public llm_graph_input_i { public: - llm_graph_input_pos(int64_t n_pos_per_embd) : n_pos_per_embd(n_pos_per_embd) {} + llm_graph_input_pos(uint32_t n_pos_per_embd) : n_pos_per_embd(n_pos_per_embd) {} virtual ~llm_graph_input_pos() = default; void set_input(const llama_ubatch * ubatch) override; + bool can_reuse(const llm_graph_params & params) override; + ggml_tensor * pos = nullptr; // I32 [n_batch] - const int64_t n_pos_per_embd = 1; + const uint32_t n_pos_per_embd = 1; }; // temperature tuning, used by llama4 @@ -135,7 +151,7 @@ class llm_graph_input_pos_bucket_kv : public llm_graph_input_i { public: llm_graph_input_pos_bucket_kv( const llama_hparams & hparams, - const llama_kv_cache_unified_state * kv_state) : hparams(hparams), kv_state(kv_state) {} + const llama_kv_cache_unified_context * mctx) : hparams(hparams), mctx(mctx) {} virtual ~llm_graph_input_pos_bucket_kv() = default; void set_input(const llama_ubatch * ubatch) override; @@ -143,7 +159,8 @@ class llm_graph_input_pos_bucket_kv : public llm_graph_input_i { ggml_tensor * pos_bucket = nullptr; // I32 [n_kv, n_batch] const llama_hparams & hparams; - const llama_kv_cache_unified_state * kv_state; + + const llama_kv_cache_unified_context * mctx; }; class llm_graph_input_out_ids : public llm_graph_input_i { @@ -151,17 +168,19 @@ class llm_graph_input_out_ids : public llm_graph_input_i { llm_graph_input_out_ids( const llama_hparams & hparams, const llama_cparams & cparams, - int32_t n_outputs) : hparams(hparams), cparams(cparams), n_outputs(n_outputs) {} + uint32_t n_outputs) : hparams(hparams), cparams(cparams), n_outputs(n_outputs) {} virtual ~llm_graph_input_out_ids() = default; void set_input(const llama_ubatch * ubatch) override; + bool can_reuse(const llm_graph_params & params) override; + ggml_tensor * out_ids; // I32 [n_outputs] const llama_hparams & hparams; const llama_cparams & cparams; - const int32_t n_outputs; + const uint32_t n_outputs; }; class llm_graph_input_mean : public llm_graph_input_i { @@ -188,16 +207,16 @@ class llm_graph_input_cls : public llm_graph_input_i { const llama_cparams & cparams; }; -class llm_graph_input_s_copy : public llm_graph_input_i { +class llm_graph_input_rs : public llm_graph_input_i { public: - llm_graph_input_s_copy(const llama_kv_cache_recurrent_state * kv_state) : kv_state(kv_state) {} - virtual ~llm_graph_input_s_copy() = default; + llm_graph_input_rs(const llama_memory_recurrent_context * mctx) : mctx(mctx) {} + virtual ~llm_graph_input_rs() = default; void set_input(const llama_ubatch * ubatch) override; ggml_tensor * s_copy; // I32 [kv_size] - const llama_kv_cache_recurrent_state * kv_state; + const llama_memory_recurrent_context * mctx; }; class llm_graph_input_cross_embd : public llm_graph_input_i { @@ -225,8 +244,8 @@ class llm_graph_input_attn_no_cache : public llm_graph_input_i { ggml_tensor * get_kq_mask() const { return kq_mask_cnv; } - ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch] - ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch] + ggml_tensor * kq_mask = nullptr; // F32 [n_tokens, n_batch, 1, 1] + ggml_tensor * kq_mask_cnv = nullptr; // [n_tokens, n_batch, 1, 1] const llama_hparams & hparams; const llama_cparams & cparams; @@ -237,24 +256,32 @@ class llm_graph_input_attn_kv_unified : public llm_graph_input_i { llm_graph_input_attn_kv_unified( const llama_hparams & hparams, const llama_cparams & cparams, - const llama_kv_cache_unified_state * kv_state) : + const llama_kv_cache_unified_context * mctx) : hparams(hparams), cparams(cparams), - kv_state(kv_state) { + mctx(mctx) { } ~llm_graph_input_attn_kv_unified() = default; void set_input(const llama_ubatch * ubatch) override; + bool can_reuse(const llm_graph_params & params) override; + + ggml_tensor * get_k_idxs() const { return self_k_idxs; } + ggml_tensor * get_v_idxs() const { return self_v_idxs; } + ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; } - ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch] - ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch] + ggml_tensor * self_k_idxs = nullptr; // I64 [n_batch] + ggml_tensor * self_v_idxs = nullptr; // I64 [n_batch] or [n_batch*n_embd_v_gqa] + + ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream] + ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream] const llama_hparams & hparams; const llama_cparams & cparams; - const llama_kv_cache_unified_state * kv_state; + const llama_kv_cache_unified_context * mctx; }; class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i { @@ -262,27 +289,39 @@ class llm_graph_input_attn_kv_unified_iswa : public llm_graph_input_i { llm_graph_input_attn_kv_unified_iswa( const llama_hparams & hparams, const llama_cparams & cparams, - const llama_kv_cache_unified_iswa_state * kv_state) : + const llama_kv_cache_unified_iswa_context * mctx) : hparams(hparams), cparams(cparams), - kv_state(kv_state) { + mctx(mctx) { } ~llm_graph_input_attn_kv_unified_iswa() = default; void set_input(const llama_ubatch * ubatch) override; + bool can_reuse(const llm_graph_params & params) override; + + ggml_tensor * get_k_idxs() const { return self_k_idxs; } + ggml_tensor * get_v_idxs() const { return self_v_idxs; } + ggml_tensor * get_k_idxs_swa() const { return self_k_idxs_swa; } + ggml_tensor * get_v_idxs_swa() const { return self_v_idxs_swa; } + ggml_tensor * get_kq_mask() const { return self_kq_mask_cnv; } ggml_tensor * get_kq_mask_swa() const { return self_kq_mask_swa_cnv; } - ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch] - ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch] - ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch] - ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch] + ggml_tensor * self_k_idxs = nullptr; // I64 [n_batch] + ggml_tensor * self_v_idxs = nullptr; // I64 [n_batch] or [n_batch*n_embd_v_gqa] + ggml_tensor * self_k_idxs_swa = nullptr; // I64 [n_batch] + ggml_tensor * self_v_idxs_swa = nullptr; // I64 [n_batch] or [n_batch*n_embd_v_gqa] + + ggml_tensor * self_kq_mask = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream] + ggml_tensor * self_kq_mask_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream] + ggml_tensor * self_kq_mask_swa = nullptr; // F32 [n_kv, n_batch/n_stream, 1, n_stream] + ggml_tensor * self_kq_mask_swa_cnv = nullptr; // [n_kv, n_batch/n_stream, 1, n_stream] const llama_hparams & hparams; const llama_cparams & cparams; - const llama_kv_cache_unified_iswa_state * kv_state; + const llama_kv_cache_unified_iswa_context * mctx; }; class llm_graph_input_attn_cross : public llm_graph_input_i { @@ -294,12 +333,34 @@ class llm_graph_input_attn_cross : public llm_graph_input_i { ggml_tensor * get_kq_mask_cross() const { return cross_kq_mask_cnv; } - ggml_tensor * cross_kq_mask = nullptr; // F32 [n_outputs_enc, n_batch] - ggml_tensor * cross_kq_mask_cnv = nullptr; // F32 [n_outputs_enc, n_batch] + ggml_tensor * cross_kq_mask = nullptr; // F32 [n_outputs_enc, n_batch, 1, 1] + ggml_tensor * cross_kq_mask_cnv = nullptr; // F32 [n_outputs_enc, n_batch, 1, 1] const llama_cross * cross = nullptr; }; +class llm_graph_input_mem_hybrid : public llm_graph_input_i { +public: + llm_graph_input_mem_hybrid( + std::unique_ptr inp_attn, + std::unique_ptr inp_rs, + const llama_memory_hybrid_context * mctx) : + inp_attn(std::move(inp_attn)), + inp_rs(std::move(inp_rs)), + mctx(mctx) { } + virtual ~llm_graph_input_mem_hybrid() = default; + + void set_input(const llama_ubatch * ubatch) override; + + std::unique_ptr inp_attn; + std::unique_ptr inp_rs; + + llm_graph_input_attn_kv_unified * get_attn() const { return inp_attn.get(); } + llm_graph_input_rs * get_recr() const { return inp_rs.get(); } + + const llama_memory_hybrid_context * mctx; +}; + // // llm_graph_result // @@ -310,40 +371,127 @@ class llm_graph_input_attn_cross : public llm_graph_input_i { // along with the input tensors, the object also provides commonly used outputs tensors, such as logits, embeddings, etc. // these are used by the llama_context to extact the relevant data, based on the compute parameters +// TODO: this interface seems redundant - remove it class llm_graph_result_i { public: virtual ~llm_graph_result_i() = default; - virtual ggml_tensor * get_tokens() = 0; - virtual ggml_tensor * get_logits() = 0; - virtual ggml_tensor * get_embd() = 0; - virtual ggml_tensor * get_embd_pooled() = 0; + virtual ggml_tensor * get_tokens() const = 0; + virtual ggml_tensor * get_logits() const = 0; + virtual ggml_tensor * get_embd() const = 0; + virtual ggml_tensor * get_embd_pooled() const = 0; + + virtual ggml_cgraph * get_gf() = 0; + virtual ggml_context * get_ctx() = 0; + + virtual void reset() = 0; virtual void set_inputs(const llama_ubatch * ubatch) = 0; + + virtual bool can_reuse(const llm_graph_params & params) = 0; }; using llm_graph_result_ptr = std::unique_ptr; +// callback that allows us to apply custom logic to each tensor (e.g. ggml-alloc, offloading, etc.) +using llm_graph_cb = std::function; + +struct llm_graph_params { + llm_arch arch = LLM_ARCH_UNKNOWN; + + llama_hparams hparams; + llama_cparams cparams; + + llama_ubatch ubatch; // note: intentionally make a copy + + llm_graph_type gtype; + + ggml_backend_sched_t sched; + ggml_backend_t backend_cpu; + + const llama_adapter_cvec * cvec; + const llama_adapter_loras * loras; + const llama_memory_context_i * mctx; + const llama_cross * cross; + + uint32_t n_outputs; + + llm_graph_cb cb; + + // TODO: temporary + llm_graph_result_i * res; + + // return true if the "other" params would result in a graph with the same topology as with the current params + // having the same topology allows us to reuse the graph in some cases + bool allow_reuse(const llm_graph_params & other) const { + // first check the ubatch + bool can_reuse_ubatch = + ubatch.equal_seqs() == other.ubatch.equal_seqs() && + ubatch.n_tokens == other.ubatch.n_tokens && + ubatch.n_seq_tokens == other.ubatch.n_seq_tokens && + ubatch.n_seqs == other.ubatch.n_seqs && + ubatch.n_seqs_unq == other.ubatch.n_seqs_unq && + ( + (!ubatch.token && !other.ubatch.token) || + (!ubatch.embd && !other.ubatch.embd) + ); + + if (can_reuse_ubatch && !ubatch.equal_seqs()) { + if (!ubatch.data) { + // if the old ubatch does not own it's data, then we cannot guarantee that it is still alive, and + // therefore we cannot perform the sequence id check. normally should never happen + can_reuse_ubatch = false; + } else { + for (uint32_t s = 0; s < ubatch.n_seqs_unq; ++s) { + can_reuse_ubatch &= ubatch.seq_id_unq[s] == other.ubatch.seq_id_unq[s]; + } + } + } + + if (!can_reuse_ubatch) { + return false; + } + + return + cparams.embeddings == other.cparams.embeddings && + cparams.causal_attn == other.cparams.causal_attn && + arch == other.arch && + gtype == other.gtype && + cvec == other.cvec && + loras == other.loras && + cross == other.cross && + n_outputs == other.n_outputs; + } +}; class llm_graph_result : public llm_graph_result_i { public: + llm_graph_result(int64_t max_nodes); + virtual ~llm_graph_result() = default; - ggml_tensor * get_tokens() override { return t_tokens; } - ggml_tensor * get_logits() override { return t_logits; } - ggml_tensor * get_embd() override { return t_embd; } - ggml_tensor * get_embd_pooled() override { return t_embd_pooled; } + ggml_tensor * get_tokens() const override { return t_tokens; } + ggml_tensor * get_logits() const override { return t_logits; } + ggml_tensor * get_embd() const override { return t_embd; } + ggml_tensor * get_embd_pooled() const override { return t_embd_pooled; } - void set_inputs(const llama_ubatch * ubatch) override { - for (auto & input : inputs) { - input->set_input(ubatch); - } - } + ggml_cgraph * get_gf() override { return gf; } + ggml_context * get_ctx() override { return ctx_compute.get(); } - llm_graph_input_i * add_input(llm_graph_input_ptr input) { - inputs.emplace_back(std::move(input)); - return inputs.back().get(); - } + int64_t get_max_nodes() const; + + void reset() override; + + void set_inputs(const llama_ubatch * ubatch) override; + + // try to update the existing graph result using the new graph parameters in order to reuse it + // this can only be done if we determine that the resulting graph using the new graph parameters + // would be identical to the existing graph. in that case, we simply have to update the memory + // contexts of the input tensors of the graph and we can reuse it for another computation + // return true if the graph was updated and can be reused + bool can_reuse(const llm_graph_params & params) override; + + llm_graph_input_i * add_input(llm_graph_input_ptr input); // important graph nodes ggml_tensor * t_tokens = nullptr; @@ -352,36 +500,31 @@ class llm_graph_result : public llm_graph_result_i { ggml_tensor * t_embd_pooled = nullptr; std::vector inputs; -}; - -// -// llm_graph_context -// -// callback that allows us to apply custom logic to each tensor (e.g. ggml-alloc, offloading, etc.) -using llm_graph_cb = std::function; + ggml_context_ptr ctx_compute; -struct llm_graph_params { - ggml_context * ctx; + // memory buffers used to evaluate the model + std::vector buf_compute_meta; - const llm_arch arch; + ggml_cgraph * gf; - const llama_hparams & hparams; - const llama_cparams & cparams; - const llama_ubatch & ubatch; + int64_t max_nodes; - ggml_backend_sched_t sched; - ggml_backend_t backend_cpu; + // keep a copy of the previous graph parameters + // we will use this to determine whether the graph can be reused by comparing them with the new parameters + // note: these are updated after constructing the new graph + llm_graph_params params; - const llama_adapter_cvec * cvec; - const llama_adapter_loras * loras; - const llama_memory_state_i * mstate; - const llama_cross * cross; + // env: LLAMA_GRAPH_RESULT_DEBUG + int debug = 0; +}; - uint32_t n_outputs; +// +// llm_graph_context +// - const llm_graph_cb & cb; -}; +// used in build_rs to properly order writes and avoid unnecessary copies +using llm_graph_get_rows_fn = std::function; struct llm_graph_context { const llm_arch arch; @@ -419,24 +562,23 @@ struct llm_graph_context { const enum llama_pooling_type pooling_type; const enum llama_rope_type rope_type; - ggml_context * ctx0 = nullptr; - ggml_backend_sched_t sched; ggml_backend_t backend_cpu; // TODO: needed by build_attn_mha, figure out a way to remove? - const llama_adapter_cvec * cvec; - const llama_adapter_loras * loras; - const llama_memory_state_i * mstate; - const llama_cross * cross; + const llama_adapter_cvec * cvec; + const llama_adapter_loras * loras; + const llama_memory_context_i * mctx; + const llama_cross * cross; const llm_graph_cb & cb_func; - std::unique_ptr res; + llm_graph_result * res; - llm_graph_context(const llm_graph_params & params); + ggml_context * ctx0 = nullptr; - int64_t n_pos_per_embd() const; + llm_graph_context(const llm_graph_params & params); + virtual ~llm_graph_context() = default; void cb(ggml_tensor * cur, const char * name, int il) const; @@ -508,7 +650,6 @@ struct llm_graph_context { ggml_tensor * build_inp_out_ids() const; ggml_tensor * build_inp_mean() const; ggml_tensor * build_inp_cls() const; - ggml_tensor * build_inp_s_copy() const; ggml_tensor * build_inp_cross_embd() const; ggml_tensor * build_inp_pos_bucket_enc() const; @@ -561,14 +702,15 @@ struct llm_graph_context { llm_graph_input_attn_kv_unified_iswa * build_attn_inp_kv_unified_iswa() const; + // note: if k_cur or v_cur are not provided, they will not be stored in the memory ggml_tensor * build_attn( llm_graph_input_attn_kv_unified_iswa * inp, ggml_cgraph * gf, ggml_tensor * wo, ggml_tensor * wo_b, ggml_tensor * q_cur, // [n_embd_head_q, n_head_q, n_tokens] - ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens] - ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens] + ggml_tensor * k_cur, // [n_embd_head_k, n_head_k, n_tokens] optional + ggml_tensor * v_cur, // [n_embd_head_v, n_head_v, n_tokens] optional ggml_tensor * kq_b, ggml_tensor * v_mla, // [n_embd_head_v_mla, n_embd_head_v, n_head_v] float kq_scale, @@ -593,24 +735,49 @@ struct llm_graph_context { // recurrent // - ggml_tensor * build_recurrent_state( - ggml_cgraph * gf, - ggml_tensor * s, - ggml_tensor * state_copy, - int32_t state_size, - int32_t n_seqs, - bool avoid_copies = false) const; + // TODO: avoid notion of "kv" + // TODO: move this implementation to llama_memory_recurrent. + // this is analogous to llama_kv_cache_unified::cpy_k / cpy_v + // when moving, avoid passing `ggml_cgraph` - only pass `ggml_context`. would likely need to split the + // implementation in 2 separate methods. the goal is to avoid calling `ggml_build_forward_expand` in + // `llama_memory_recurrent` + ggml_tensor * build_rs( + ggml_cgraph * gf, + ggml_tensor * s, + ggml_tensor * state_copy, + int32_t state_size, + int32_t n_seqs, + uint32_t n_kv, + uint32_t kv_head, + uint32_t kv_size, + int32_t rs_zero, + const llm_graph_get_rows_fn & get_state_rows = ggml_get_rows) const; + + llm_graph_input_rs * build_rs_inp() const; + + ggml_tensor * build_rs( + llm_graph_input_rs * inp, + ggml_cgraph * gf, + ggml_tensor * s, + int32_t state_size, + int32_t n_seqs, + const llm_graph_get_rows_fn & get_state_rows = ggml_get_rows) const; ggml_tensor * build_rwkv_token_shift_load( - ggml_cgraph * gf, - ggml_tensor * state_copy, - const llama_ubatch & ubatch, + llm_graph_input_rs * inp, + ggml_cgraph * gf, + const llama_ubatch & ubatch, int il) const; ggml_tensor * build_rwkv_token_shift_store( ggml_tensor * token_shift, const llama_ubatch & ubatch, int il) const; + // + // hybrid + // + + llm_graph_input_mem_hybrid * build_inp_mem_hybrid() const; // // pooling diff --git a/src/llama-hparams.cpp b/src/llama-hparams.cpp index 1499eb08a5dd9..c6c67d26f9392 100644 --- a/src/llama-hparams.cpp +++ b/src/llama-hparams.cpp @@ -65,18 +65,64 @@ uint32_t llama_hparams::n_embd_v_gqa(uint32_t il) const { return n_embd_head_v * n_head_kv; } -uint32_t llama_hparams::n_embd_k_s() const { +bool llama_hparams::is_n_embd_k_gqa_variable() const { + const uint32_t val = n_embd_k_gqa(); + for (uint32_t il = 0; il < n_layer; ++il) { + if (val != n_embd_k_gqa(il)) { + return true; + } + } + + return false; +} + +bool llama_hparams::is_n_embd_v_gqa_variable() const { + const uint32_t val = n_embd_v_gqa(); + for (uint32_t il = 0; il < n_layer; ++il) { + if (val != n_embd_v_gqa(il)) { + return true; + } + } + + return false; +} + +uint32_t llama_hparams::n_embd_k_gqa_max() const { + uint32_t val = n_embd_k_gqa(); + for (uint32_t il = 0; il < n_layer; ++il) { + val = std::max(val, n_embd_k_gqa(il)); + } + + return val; +} + +uint32_t llama_hparams::n_embd_v_gqa_max() const { + uint32_t val = n_embd_v_gqa(); + for (uint32_t il = 0; il < n_layer; ++il) { + val = std::max(val, n_embd_v_gqa(il)); + } + + return val; +} + +uint32_t llama_hparams::n_embd_r() const { if (wkv_head_size != 0) { // for RWKV models return token_shift_count * n_embd; } + if (n_shortconv_l_cache != 0) { + // for LFM2 models + return n_embd * (n_shortconv_l_cache - 1); + } + // TODO: maybe support other convolution strides than 1 // NOTE: since the first column of the conv_state is shifted out each time, it's not actually needed - return (ssm_d_conv > 0 ? ssm_d_conv - 1 : 0) * ssm_d_inner; + // Corresponds to Mamba's conv_states size + return (ssm_d_conv > 0 ? ssm_d_conv - 1 : 0) * (ssm_d_inner + 2*ssm_n_group*ssm_d_state); } -uint32_t llama_hparams::n_embd_v_s() const { +uint32_t llama_hparams::n_embd_s() const { if (wkv_head_size != 0) { // corresponds to RWKV's wkv_states size return n_embd * wkv_head_size; @@ -86,6 +132,14 @@ uint32_t llama_hparams::n_embd_v_s() const { return ssm_d_state * ssm_d_inner; } +bool llama_hparams::is_recurrent(uint32_t il) const { + return recurrent_layer_arr[il]; +} + +uint32_t llama_hparams::n_pos_per_embd() const { + return rope_type == LLAMA_ROPE_TYPE_MROPE ? 4 : 1; +} + bool llama_hparams::is_swa(uint32_t il) const { if (il < n_layer) { return swa_layers[il]; diff --git a/src/llama-hparams.h b/src/llama-hparams.h index b2bcb8b01a18b..c422cd7be827a 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -6,7 +6,7 @@ // bump if necessary #define LLAMA_MAX_LAYERS 512 -#define LLAMA_MAX_EXPERTS 256 // DeepSeekV3 +#define LLAMA_MAX_EXPERTS 384 // Kimi-K2 enum llama_expert_gating_func_type { LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0, @@ -55,6 +55,8 @@ struct llama_hparams { struct llama_hparams_posnet posnet; struct llama_hparams_convnext convnext; + uint32_t n_shortconv_l_cache = 0; + std::array n_head_arr; std::array n_head_kv_arr; std::array n_ff_arr; @@ -114,6 +116,10 @@ struct llama_hparams { uint32_t ssm_d_inner = 0; uint32_t ssm_d_state = 0; uint32_t ssm_dt_rank = 0; + uint32_t ssm_n_group = 0; + + // for hybrid state space models + std::array recurrent_layer_arr; bool ssm_dt_b_c_rms = false; @@ -140,6 +146,12 @@ struct llama_hparams { uint32_t n_attn_temp_floor_scale = 8192; float f_attn_temp_scale = 0.1; + // gemma3n altup + uint32_t n_altup = 4; // altup_num_inputs + uint32_t i_altup_act = 0; // altup_active_idx + uint32_t laurel_rank = 64; + uint32_t n_embd_altup = 256; + // needed by encoder-decoder models (e.g. T5, FLAN-T5) // ref: https://github.com/ggerganov/llama.cpp/pull/8141 llama_token dec_start_token_id = LLAMA_TOKEN_NULL; @@ -179,12 +191,25 @@ struct llama_hparams { // dimension of value embeddings across all k-v heads uint32_t n_embd_v_gqa(uint32_t il = 0) const; + // true if any layer has a different n_embd_k_gqa/n_embd_v_gqa + bool is_n_embd_k_gqa_variable() const; + bool is_n_embd_v_gqa_variable() const; + + // return the maximum n_embd_k_gqa/n_embd_v_gqa across all layers + uint32_t n_embd_k_gqa_max() const; + uint32_t n_embd_v_gqa_max() const; + // dimension of the rolling state embeddings // corresponds to Mamba's conv_states size or RWKV's token_shift states size - uint32_t n_embd_k_s() const; + uint32_t n_embd_r() const; // dimension of the recurrent state embeddings - uint32_t n_embd_v_s() const; + uint32_t n_embd_s() const; + + // whether or not the given layer is recurrent (for hybrid models) + bool is_recurrent(uint32_t il) const; + + uint32_t n_pos_per_embd() const; bool is_swa(uint32_t il) const; }; diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp index 9814f76631203..01d27fb4db9b1 100644 --- a/src/llama-kv-cache-unified-iswa.cpp +++ b/src/llama-kv-cache-unified-iswa.cpp @@ -18,16 +18,17 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa( bool v_trans, bool offload, bool swa_full, + bool unified, uint32_t kv_size, uint32_t n_seq_max, uint32_t n_ubatch, - uint32_t n_pad) : hparams(model.hparams) { + uint32_t n_pad) : hparams(model.hparams), unified(unified) { llama_kv_cache_unified::layer_filter_cb filter_base = [&](int32_t il) { return !model.hparams.is_swa(il); }; llama_kv_cache_unified::layer_filter_cb filter_swa = [&](int32_t il) { return model.hparams.is_swa(il); }; const uint32_t size_base = kv_size; - uint32_t size_swa = std::min(size_base, GGML_PAD(hparams.n_swa*n_seq_max + n_ubatch, n_pad)); + uint32_t size_swa = std::min(size_base, GGML_PAD(hparams.n_swa*(unified ? n_seq_max : 1) + n_ubatch, n_pad)); // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size if (swa_full) { @@ -41,14 +42,14 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa( kv_base = std::make_unique( model, std::move(filter_base), type_k, type_v, - v_trans, offload, size_base, n_seq_max, n_pad, + v_trans, offload, unified, size_base, n_seq_max, n_pad, 0, LLAMA_SWA_TYPE_NONE); LLAMA_LOG_INFO("%s: creating SWA KV cache, size = %u cells\n", __func__, size_swa); kv_swa = std::make_unique( model, std::move(filter_swa), type_k, type_v, - v_trans, offload, size_swa, n_seq_max, n_pad, + v_trans, offload, unified, size_swa, n_seq_max, n_pad, hparams.n_swa, hparams.swa_type); } @@ -95,77 +96,98 @@ llama_pos llama_kv_cache_unified_iswa::seq_pos_max(llama_seq_id seq_id) const { return kv_swa->seq_pos_max(seq_id); } -llama_memory_state_ptr llama_kv_cache_unified_iswa::init_batch(const llama_batch & batch, uint32_t n_ubatch, bool embd_pooled) { - GGML_UNUSED(embd_pooled); +llama_memory_context_ptr llama_kv_cache_unified_iswa::init_batch(llama_batch_allocr & balloc, uint32_t n_ubatch, bool embd_all) { + GGML_UNUSED(embd_all); // first try simple split do { - auto sbatch = llama_sbatch(batch, hparams.n_embd, true); + if (!unified) { + // requires equal splits, so we skip the simple split + break; + } + + balloc.split_reset(); std::vector ubatches; + while (true) { + auto ubatch = balloc.split_simple(n_ubatch); - while (sbatch.n_tokens > 0) { - auto ubatch = sbatch.split_simple(n_ubatch); + if (ubatch.n_tokens == 0) { + break; + } - ubatches.push_back(ubatch); + ubatches.push_back(std::move(ubatch)); // NOLINT } - auto heads_base = kv_base->prepare(ubatches); - if (heads_base.empty()) { + if (balloc.get_n_used() < balloc.get_n_tokens()) { + // failed to find a suitable split break; } - auto heads_swa = kv_swa->prepare(ubatches); - if (heads_swa.empty()) { + auto sinfos_base = kv_base->prepare(ubatches); + if (sinfos_base.empty()) { break; } - assert(heads_base.size() == heads_swa.size()); + auto sinfos_swa = kv_swa->prepare(ubatches); + if (sinfos_swa.empty()) { + break; + } - return std::make_unique( - this, std::move(sbatch), std::move(heads_base), std::move(heads_swa), std::move(ubatches)); + assert(sinfos_base.size() == sinfos_swa.size()); + + return std::make_unique( + this, std::move(sinfos_base), std::move(sinfos_swa), std::move(ubatches)); } while (false); // if it fails, try equal split do { - auto sbatch = llama_sbatch(batch, hparams.n_embd, false); + balloc.split_reset(); std::vector ubatches; + while (true) { + auto ubatch = balloc.split_equal(n_ubatch, !unified); + + if (ubatch.n_tokens == 0) { + break; + } - while (sbatch.n_tokens > 0) { - auto ubatch = sbatch.split_equal(n_ubatch); + ubatches.push_back(std::move(ubatch)); // NOLINT + } - ubatches.push_back(ubatch); + if (balloc.get_n_used() < balloc.get_n_tokens()) { + // failed to find a suitable split + break; } - auto heads_base = kv_base->prepare(ubatches); - if (heads_base.empty()) { + auto sinfos_base = kv_base->prepare(ubatches); + if (sinfos_base.empty()) { break; } - auto heads_swa = kv_swa->prepare(ubatches); - if (heads_swa.empty()) { + auto sinfos_swa = kv_swa->prepare(ubatches); + if (sinfos_swa.empty()) { break; } - assert(heads_base.size() == heads_swa.size()); + assert(sinfos_base.size() == sinfos_swa.size()); - return std::make_unique( - this, std::move(sbatch), std::move(heads_base), std::move(heads_swa), std::move(ubatches)); + return std::make_unique( + this, std::move(sinfos_base), std::move(sinfos_swa), std::move(ubatches)); } while (false); // TODO: if we fail again, we should attempt different splitting strategies // but to do that properly, we first have to refactor the batches to be more flexible - return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); } -llama_memory_state_ptr llama_kv_cache_unified_iswa::init_full() { - return std::make_unique(this); +llama_memory_context_ptr llama_kv_cache_unified_iswa::init_full() { + return std::make_unique(this); } -llama_memory_state_ptr llama_kv_cache_unified_iswa::init_update(llama_context * lctx, bool optimize) { - return std::make_unique(this, lctx, optimize); +llama_memory_context_ptr llama_kv_cache_unified_iswa::init_update(llama_context * lctx, bool optimize) { + return std::make_unique(this, lctx, optimize); } bool llama_kv_cache_unified_iswa::get_can_shift() const { @@ -191,52 +213,46 @@ llama_kv_cache_unified * llama_kv_cache_unified_iswa::get_swa() const { } // -// llama_kv_cache_unified_iswa_state +// llama_kv_cache_unified_iswa_context // -llama_kv_cache_unified_iswa_state::llama_kv_cache_unified_iswa_state(llama_memory_status status) : status(status) {} +llama_kv_cache_unified_iswa_context::llama_kv_cache_unified_iswa_context(llama_memory_status status) : status(status) {} -llama_kv_cache_unified_iswa_state::llama_kv_cache_unified_iswa_state( - llama_kv_cache_unified_iswa * kv) : status(LLAMA_MEMORY_STATUS_SUCCESS) { - state_base = kv->get_base()->init_full(); - state_swa = kv->get_swa ()->init_full(); - - status = llama_memory_status_combine(state_base->get_status(), state_swa->get_status()); +llama_kv_cache_unified_iswa_context::llama_kv_cache_unified_iswa_context( + llama_kv_cache_unified_iswa * kv) : + ctx_base(kv->get_base()->init_full()), + ctx_swa (kv->get_swa ()->init_full()), + status(llama_memory_status_combine(ctx_base->get_status(), ctx_swa->get_status())) { } -llama_kv_cache_unified_iswa_state::llama_kv_cache_unified_iswa_state( +llama_kv_cache_unified_iswa_context::llama_kv_cache_unified_iswa_context( llama_kv_cache_unified_iswa * kv, llama_context * lctx, - bool optimize) : status(LLAMA_MEMORY_STATUS_SUCCESS) { - state_base = kv->get_base()->init_update(lctx, optimize); - state_swa = kv->get_swa ()->init_update(lctx, optimize); - - status = llama_memory_status_combine(state_base->get_status(), state_swa->get_status()); + bool optimize) : + ctx_base(kv->get_base()->init_update(lctx, optimize)), + ctx_swa (kv->get_swa ()->init_update(lctx, optimize)), + status(llama_memory_status_combine(ctx_base->get_status(), ctx_swa->get_status())) { } -llama_kv_cache_unified_iswa_state::llama_kv_cache_unified_iswa_state( +llama_kv_cache_unified_iswa_context::llama_kv_cache_unified_iswa_context( llama_kv_cache_unified_iswa * kv, - llama_sbatch sbatch, - std::vector heads_base, - std::vector heads_swa, - std::vector ubatches) - : status(LLAMA_MEMORY_STATUS_SUCCESS), - sbatch(std::move(sbatch)), - ubatches(std::move(ubatches)) { + slot_info_vec_t sinfos_base, + slot_info_vec_t sinfos_swa, + std::vector ubatches) : + ubatches(std::move(ubatches)), // note: here we copy the ubatches. not sure if this is ideal - state_base.reset(new llama_kv_cache_unified_state(kv->get_base(), {}, std::move(heads_base), this->ubatches)); - state_swa .reset(new llama_kv_cache_unified_state(kv->get_swa (), {}, std::move(heads_swa), this->ubatches)); - - status = llama_memory_status_combine(state_base->get_status(), state_swa->get_status()); + ctx_base(new llama_kv_cache_unified_context(kv->get_base(), std::move(sinfos_base), this->ubatches)), + ctx_swa (new llama_kv_cache_unified_context(kv->get_swa (), std::move(sinfos_swa), this->ubatches)), + status(llama_memory_status_combine(ctx_base->get_status(), ctx_swa->get_status())) { } -llama_kv_cache_unified_iswa_state:: ~llama_kv_cache_unified_iswa_state() = default; +llama_kv_cache_unified_iswa_context:: ~llama_kv_cache_unified_iswa_context() = default; -bool llama_kv_cache_unified_iswa_state::next() { +bool llama_kv_cache_unified_iswa_context::next() { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - state_base->next(); - state_swa ->next(); + ctx_base->next(); + ctx_swa ->next(); if (++i_next >= ubatches.size()) { return false; @@ -245,41 +261,35 @@ bool llama_kv_cache_unified_iswa_state::next() { return true; } -bool llama_kv_cache_unified_iswa_state::apply() { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); +bool llama_kv_cache_unified_iswa_context::apply() { + assert(!llama_memory_status_is_fail(status)); bool res = true; - res = res & state_base->apply(); - res = res & state_swa ->apply(); + res = res & ctx_base->apply(); + res = res & ctx_swa ->apply(); return res; } -std::vector & llama_kv_cache_unified_iswa_state::out_ids() { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - - return sbatch.out_ids; -} - -llama_memory_status llama_kv_cache_unified_iswa_state::get_status() const { +llama_memory_status llama_kv_cache_unified_iswa_context::get_status() const { return status; } -const llama_ubatch & llama_kv_cache_unified_iswa_state::get_ubatch() const { +const llama_ubatch & llama_kv_cache_unified_iswa_context::get_ubatch() const { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); return ubatches[i_next]; } -const llama_kv_cache_unified_state * llama_kv_cache_unified_iswa_state::get_base() const { +const llama_kv_cache_unified_context * llama_kv_cache_unified_iswa_context::get_base() const { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - return static_cast(state_base.get()); + return static_cast(ctx_base.get()); } -const llama_kv_cache_unified_state * llama_kv_cache_unified_iswa_state::get_swa() const { +const llama_kv_cache_unified_context * llama_kv_cache_unified_iswa_context::get_swa() const { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - return static_cast(state_swa.get()); + return static_cast(ctx_swa.get()); } diff --git a/src/llama-kv-cache-unified-iswa.h b/src/llama-kv-cache-unified-iswa.h index d114c7378fbe9..d2650dadd3595 100644 --- a/src/llama-kv-cache-unified-iswa.h +++ b/src/llama-kv-cache-unified-iswa.h @@ -20,6 +20,7 @@ class llama_kv_cache_unified_iswa : public llama_memory_i { bool v_trans, bool offload, bool swa_full, + bool unified, uint32_t kv_size, uint32_t n_seq_max, uint32_t n_ubatch, @@ -31,14 +32,14 @@ class llama_kv_cache_unified_iswa : public llama_memory_i { // llama_memory_i // - llama_memory_state_ptr init_batch( - const llama_batch & batch, + llama_memory_context_ptr init_batch( + llama_batch_allocr & balloc, uint32_t n_ubatch, - bool embd_pooled) override; + bool embd_all) override; - llama_memory_state_ptr init_full() override; + llama_memory_context_ptr init_full() override; - llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) override; + llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override; bool get_can_shift() const override; @@ -68,66 +69,65 @@ class llama_kv_cache_unified_iswa : public llama_memory_i { private: const llama_hparams & hparams; + const bool unified; + std::unique_ptr kv_base; std::unique_ptr kv_swa; }; -class llama_kv_cache_unified_iswa_state : public llama_memory_state_i { +class llama_kv_cache_unified_iswa_context : public llama_memory_context_i { public: + using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t; + // used for errors - llama_kv_cache_unified_iswa_state(llama_memory_status status); + llama_kv_cache_unified_iswa_context(llama_memory_status status); - // used to create a full-cache state - llama_kv_cache_unified_iswa_state( + // used to create a full-cache context + llama_kv_cache_unified_iswa_context( llama_kv_cache_unified_iswa * kv); - // used to create an update state - llama_kv_cache_unified_iswa_state( + // used to create an update context + llama_kv_cache_unified_iswa_context( llama_kv_cache_unified_iswa * kv, llama_context * lctx, bool optimize); - // used to create a state from a batch - llama_kv_cache_unified_iswa_state( + // used to create a batch processing context from a batch + llama_kv_cache_unified_iswa_context( llama_kv_cache_unified_iswa * kv, - llama_sbatch sbatch, - std::vector heads_base, - std::vector heads_swa, + slot_info_vec_t sinfos_base, + slot_info_vec_t sinfos_swa, std::vector ubatches); - virtual ~llama_kv_cache_unified_iswa_state(); + virtual ~llama_kv_cache_unified_iswa_context(); // - // llama_memory_state_i + // llama_memory_context_i // bool next() override; bool apply() override; - std::vector & out_ids() override; - llama_memory_status get_status() const override; const llama_ubatch & get_ubatch() const override; // - // llama_kv_cache_unified_iswa_state specific API + // llama_kv_cache_unified_iswa_context specific API // - const llama_kv_cache_unified_state * get_base() const; - const llama_kv_cache_unified_state * get_swa() const; + const llama_kv_cache_unified_context * get_base() const; + const llama_kv_cache_unified_context * get_swa() const; private: - llama_memory_status status; - //llama_kv_cache_unified_iswa * kv; - llama_sbatch sbatch; - // the index of the next ubatch to process size_t i_next = 0; std::vector ubatches; - llama_memory_state_ptr state_base; - llama_memory_state_ptr state_swa; + const llama_memory_context_ptr ctx_base; + const llama_memory_context_ptr ctx_swa; + + const llama_memory_status status; }; diff --git a/src/llama-kv-cache-unified.cpp b/src/llama-kv-cache-unified.cpp index d4e92eab3a179..321dc79fc36ab 100644 --- a/src/llama-kv-cache-unified.cpp +++ b/src/llama-kv-cache-unified.cpp @@ -23,23 +23,30 @@ llama_kv_cache_unified::llama_kv_cache_unified( ggml_type type_v, bool v_trans, bool offload, + bool unified, uint32_t kv_size, uint32_t n_seq_max, uint32_t n_pad, uint32_t n_swa, llama_swa_type swa_type) : model(model), hparams(model.hparams), v_trans(v_trans), - n_seq_max(n_seq_max), n_pad(n_pad), n_swa(n_swa), swa_type(swa_type) { + n_seq_max(n_seq_max), n_stream(unified ? 1 : n_seq_max), n_pad(n_pad), n_swa(n_swa), swa_type(swa_type) { GGML_ASSERT(kv_size % n_pad == 0); + // TODO: this is temporary until we support passing reuse layer filters [KV_REUSE] + auto n_layer_cache = hparams.n_layer; + if (model.arch == LLM_ARCH_GEMMA3N) { + n_layer_cache = 20; + } + // create a context for each buffer type std::map ctx_map; auto ctx_for_buft = [&](ggml_backend_buffer_type_t buft) -> ggml_context * { auto it = ctx_map.find(buft); if (it == ctx_map.end()) { ggml_init_params params = { - /*.mem_size =*/ size_t(2u*hparams.n_layer*ggml_tensor_overhead()), + /*.mem_size =*/ size_t(2u*(1 + n_stream)*n_layer_cache*ggml_tensor_overhead()), /*.mem_buffer =*/ NULL, /*.no_alloc =*/ true, }; @@ -58,18 +65,43 @@ llama_kv_cache_unified::llama_kv_cache_unified( return it->second; }; - head = 0; + GGML_ASSERT(n_stream == 1 || n_stream == n_seq_max); + + v_heads.resize(n_stream); + for (uint32_t s = 0; s < n_stream; ++s) { + v_heads[s] = 0; + } + + v_cells.resize(n_stream); + for (uint32_t s = 0; s < n_stream; ++s) { + v_cells[s].resize(kv_size); + } + + // by default, all sequence ids are mapped to the 0th stream + seq_to_stream.resize(LLAMA_MAX_SEQ, 0); + + if (n_stream > 1) { + seq_to_stream.resize(n_stream, 0); + for (uint32_t s = 0; s < n_stream; ++s) { + seq_to_stream[s] = s; + } + } - cells.resize(kv_size); + // [TAG_V_CACHE_VARIABLE] + if (v_trans && hparams.is_n_embd_v_gqa_variable()) { + LLAMA_LOG_WARN("%s: the V embeddings have different sizes across layers and FA is not enabled - padding V cache to %d\n", + __func__, hparams.n_embd_v_gqa_max()); + } - for (uint32_t il = 0; il < hparams.n_layer; il++) { + for (uint32_t il = 0; il < n_layer_cache; il++) { if (filter && !filter(il)) { LLAMA_LOG_DEBUG("%s: layer %3d: skipped\n", __func__, il); continue; } - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il) + hparams.n_embd_k_s(); - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + // [TAG_V_CACHE_VARIABLE] + const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il); + const uint32_t n_embd_v_gqa = !v_trans ? hparams.n_embd_v_gqa(il) : hparams.n_embd_v_gqa_max(); const char * dev_name = "CPU"; @@ -92,14 +124,43 @@ llama_kv_cache_unified::llama_kv_cache_unified( ggml_tensor * k; ggml_tensor * v; - k = ggml_new_tensor_2d(ctx, type_k, n_embd_k_gqa, kv_size); - v = ggml_new_tensor_2d(ctx, type_v, n_embd_v_gqa, kv_size); + k = ggml_new_tensor_3d(ctx, type_k, n_embd_k_gqa, kv_size, n_stream); + v = ggml_new_tensor_3d(ctx, type_v, n_embd_v_gqa, kv_size, n_stream); ggml_format_name(k, "cache_k_l%d", il); ggml_format_name(v, "cache_v_l%d", il); + std::vector k_stream; + std::vector v_stream; + + for (uint32_t s = 0; s < n_stream; ++s) { + k_stream.push_back(ggml_view_2d(ctx, k, n_embd_k_gqa, kv_size, k->nb[1], s*k->nb[2])); + v_stream.push_back(ggml_view_2d(ctx, v, n_embd_v_gqa, kv_size, v->nb[1], s*v->nb[2])); + } + map_layer_ids[il] = layers.size(); - layers.push_back({ il, k, v }); + + layers.push_back({ il, k, v, k_stream, v_stream, }); + } + + // TODO: this is temporary until we support passing reuse layer filters [KV_REUSE] + if (model.arch == LLM_ARCH_GEMMA3N) { + LLAMA_LOG_DEBUG("%s: GEMMA3N: reuse layers [%d, %d]\n", __func__, n_layer_cache, hparams.n_layer - 1); + + for (uint32_t il = n_layer_cache; il < hparams.n_layer; il++) { + if (filter && !filter(il)) { + LLAMA_LOG_DEBUG("%s: layer %3d: skipped\n", __func__, il); + continue; + } + + const bool is_swa = hparams.is_swa(il); + const uint32_t il_reuse = n_layer_cache - (is_swa ? 2 : 1); + + GGML_ASSERT(map_layer_ids.find(il_reuse) != map_layer_ids.end()); + map_layer_ids[il] = map_layer_ids[il_reuse]; + + LLAMA_LOG_DEBUG("%s: layer %3d: reuse layer %d, isw = %d\n", __func__, il, il_reuse, is_swa); + } } // allocate tensors and initialize the buffers to avoid NaNs in the padding @@ -122,20 +183,33 @@ llama_kv_cache_unified::llama_kv_cache_unified( const size_t memory_size_k = size_k_bytes(); const size_t memory_size_v = size_v_bytes(); - LLAMA_LOG_INFO("%s: size = %7.2f MiB (%6u cells, %3d layers, %2u seqs), K (%s): %7.2f MiB, V (%s): %7.2f MiB\n", __func__, - (float)(memory_size_k + memory_size_v) / (1024.0f * 1024.0f), kv_size, (int) layers.size(), n_seq_max, + LLAMA_LOG_INFO("%s: size = %7.2f MiB (%6u cells, %3d layers, %2u/%2u seqs), K (%s): %7.2f MiB, V (%s): %7.2f MiB\n", __func__, + (float)(memory_size_k + memory_size_v) / (1024.0f * 1024.0f), kv_size, (int) layers.size(), n_seq_max, n_stream, ggml_type_name(type_k), (float)memory_size_k / (1024.0f * 1024.0f), ggml_type_name(type_v), (float)memory_size_v / (1024.0f * 1024.0f)); } const char * LLAMA_KV_CACHE_DEBUG = getenv("LLAMA_KV_CACHE_DEBUG"); debug = LLAMA_KV_CACHE_DEBUG ? atoi(LLAMA_KV_CACHE_DEBUG) : 0; + + const char * LLAMA_SET_ROWS = getenv("LLAMA_SET_ROWS"); + supports_set_rows = LLAMA_SET_ROWS ? atoi(LLAMA_SET_ROWS) != 0 : 0; + + if (!supports_set_rows) { + // ref: https://github.com/ggml-org/llama.cpp/pull/14363 + GGML_ASSERT(unified && "cannot use non-unified KV cache without ggml_set_rows() support"); + } + + if (!supports_set_rows) { + LLAMA_LOG_WARN("%s: LLAMA_SET_ROWS=0, using old ggml_cpy() method for backwards compatibility\n", __func__); + } } void llama_kv_cache_unified::clear(bool data) { - cells.reset(); - - head = 0; + for (uint32_t s = 0; s < n_stream; ++s) { + v_cells[s].reset(); + v_heads[s] = 0; + } if (data) { for (auto & buf : bufs) { @@ -145,6 +219,11 @@ void llama_kv_cache_unified::clear(bool data) { } bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + auto & cells = v_cells[seq_to_stream[seq_id]]; + auto & head = v_heads[seq_to_stream[seq_id]]; + uint32_t new_head = cells.size(); if (p0 < 0) { @@ -191,30 +270,94 @@ bool llama_kv_cache_unified::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos } void llama_kv_cache_unified::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) { - if (seq_id_src == seq_id_dst) { + GGML_ASSERT(seq_id_src >= 0 && (size_t) seq_id_src < seq_to_stream.size()); + GGML_ASSERT(seq_id_dst >= 0 && (size_t) seq_id_dst < seq_to_stream.size()); + + const auto s0 = seq_to_stream[seq_id_src]; + const auto s1 = seq_to_stream[seq_id_dst]; + + if (s0 == s1) { + // since both sequences are in the same stream, no data copy is necessary + // we just have to update the cells meta data + + auto & cells = v_cells[s0]; + + if (seq_id_src == seq_id_dst) { + return; + } + + if (p0 < 0) { + p0 = 0; + } + + if (p1 < 0) { + p1 = std::numeric_limits::max(); + } + + for (uint32_t i = 0; i < cells.size(); ++i) { + if (!cells.pos_in(i, p0, p1)) { + continue; + } + + if (cells.seq_has(i, seq_id_src)) { + cells.seq_add(i, seq_id_dst); + } + } + return; } - if (p0 < 0) { - p0 = 0; + // cross-stream sequence copies require to copy the actual buffer data + + bool is_full = true; + + if (p0 > 0 && p0 + 1 < (int) get_size()) { + is_full = false; } - if (p1 < 0) { - p1 = std::numeric_limits::max(); + if (p1 > 0 && p1 + 1 < (int) get_size()) { + is_full = false; } - for (uint32_t i = 0; i < cells.size(); ++i) { - if (!cells.pos_in(i, p0, p1)) { - continue; - } + GGML_ASSERT(is_full && "seq_cp() is only supported for full KV buffers"); - if (cells.seq_has(i, seq_id_src)) { - cells.seq_add(i, seq_id_dst); + // enqueue the copy operation - the buffer copy will be performed during the next update + sc_info.ssrc.push_back(s0); + sc_info.sdst.push_back(s1); + + v_cells[s1].reset(); + for (uint32_t i = 0; i < v_cells[s0].size(); ++i) { + if (v_cells[s0].seq_has(i, seq_id_src)) { + llama_pos pos = v_cells[s0].pos_get(i); + llama_pos shift = v_cells[s0].get_shift(i); + + if (shift != 0) { + pos -= shift; + assert(pos >= 0); + } + + v_cells[s1].pos_set(i, pos); + v_cells[s1].seq_add(i, seq_id_dst); + + if (shift != 0) { + v_cells[s1].pos_add(i, shift); + } } } + + v_heads[s1] = v_heads[s0]; + + //for (uint32_t s = 0; s < n_stream; ++s) { + // LLAMA_LOG_WARN("%s: seq %d: min = %d, max = %d\n", __func__, s, v_cells[s].seq_pos_min(s), v_cells[s].seq_pos_max(s)); + //} } void llama_kv_cache_unified::seq_keep(llama_seq_id seq_id) { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + auto & cells = v_cells[seq_to_stream[seq_id]]; + auto & head = v_heads[seq_to_stream[seq_id]]; + uint32_t new_head = cells.size(); for (uint32_t i = 0; i < cells.size(); ++i) { @@ -232,6 +375,11 @@ void llama_kv_cache_unified::seq_keep(llama_seq_id seq_id) { } void llama_kv_cache_unified::seq_add(llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + auto & cells = v_cells[seq_to_stream[seq_id]]; + auto & head = v_heads[seq_to_stream[seq_id]]; + if (shift == 0) { return; } @@ -271,6 +419,10 @@ void llama_kv_cache_unified::seq_add(llama_seq_id seq_id, llama_pos p0, llama_po } void llama_kv_cache_unified::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + auto & cells = v_cells[seq_to_stream[seq_id]]; + if (d == 1) { return; } @@ -300,50 +452,72 @@ void llama_kv_cache_unified::seq_div(llama_seq_id seq_id, llama_pos p0, llama_po } llama_pos llama_kv_cache_unified::seq_pos_min(llama_seq_id seq_id) const { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + const auto & cells = v_cells[seq_to_stream[seq_id]]; + return cells.seq_pos_min(seq_id); } llama_pos llama_kv_cache_unified::seq_pos_max(llama_seq_id seq_id) const { + GGML_ASSERT(seq_id >= 0 && (size_t) seq_id < seq_to_stream.size()); + + const auto & cells = v_cells[seq_to_stream[seq_id]]; + return cells.seq_pos_max(seq_id); } -llama_memory_state_ptr llama_kv_cache_unified::init_batch( - const llama_batch & batch, +llama_memory_context_ptr llama_kv_cache_unified::init_batch( + llama_batch_allocr & balloc, uint32_t n_ubatch, - bool embd_pooled) { - GGML_UNUSED(embd_pooled); + bool embd_all) { + GGML_UNUSED(embd_all); do { - auto sbatch = llama_sbatch(batch, hparams.n_embd, true); + balloc.split_reset(); std::vector ubatches; - while (sbatch.n_tokens > 0) { - ubatches.push_back(sbatch.split_simple(n_ubatch)); + while (true) { + auto ubatch = n_stream == 1 ? balloc.split_simple(n_ubatch) : balloc.split_equal(n_ubatch, true); + + if (ubatch.n_tokens == 0) { + break; + } + + ubatches.push_back(std::move(ubatch)); // NOLINT + } + + if (balloc.get_n_used() < balloc.get_n_tokens()) { + // failed to find a suitable split + break; } - auto heads = prepare(ubatches); - if (heads.empty()) { + auto sinfos = prepare(ubatches); + if (sinfos.empty()) { break; } - return std::make_unique( - this, std::move(sbatch), std::move(heads), std::move(ubatches)); + return std::make_unique( + this, std::move(sinfos), std::move(ubatches)); } while (false); - return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); } -llama_memory_state_ptr llama_kv_cache_unified::init_full() { - return std::make_unique(this); +llama_memory_context_ptr llama_kv_cache_unified::init_full() { + return std::make_unique(this); } -llama_memory_state_ptr llama_kv_cache_unified::init_update(llama_context * lctx, bool optimize) { +llama_memory_context_ptr llama_kv_cache_unified::init_update(llama_context * lctx, bool optimize) { bool do_shift = get_has_shift(); defrag_info dinfo; // see if we need to defrag - { + if (n_stream == 1) { + // note : for now do not consider defrag for n_stream > 1 + const auto & cells = v_cells[seq_to_stream[0]]; + bool do_defrag = optimize; const auto thold = lctx->get_cparams().defrag_thold; @@ -367,46 +541,69 @@ llama_memory_state_ptr llama_kv_cache_unified::init_update(llama_context * lctx, } } - return std::make_unique(this, lctx, do_shift, std::move(dinfo)); + return std::make_unique(this, lctx, do_shift, std::move(dinfo), std::move(sc_info)); } -llama_kv_cache_unified::ubatch_heads llama_kv_cache_unified::prepare(const std::vector & ubatches) { - llama_kv_cache_unified::ubatch_heads res; +llama_kv_cache_unified::slot_info_vec_t llama_kv_cache_unified::prepare(const std::vector & ubatches) { + llama_kv_cache_unified::slot_info_vec_t res; + + struct state_t { + slot_info sinfo; // slot info for the ubatch - struct state { - uint32_t head_old; // old position of the head, before placing the ubatch - uint32_t head_new; // new position of the head, after placing the ubatch + std::vector v_heads_old; // old positions of the heads, before placing the ubatch - llama_kv_cells_unified cells; // copy of the old cells, before placing the ubatch + std::vector v_cells; // copy of the old cells, before placing the ubatch }; // remember the old state of the cells so we can restore it in the end - std::vector states; + std::vector states; bool success = true; for (const auto & ubatch : ubatches) { + // non-continuous slots require support for ggml_set_rows() + const bool cont = supports_set_rows ? false : true; + // only find a suitable slot for the ubatch. don't modify the cells yet - const int32_t head_new = find_slot(ubatch); - if (head_new < 0) { + const auto sinfo_new = find_slot(ubatch, cont); + if (sinfo_new.empty()) { success = false; break; } // remeber the position that we found - res.push_back(head_new); + res.push_back(sinfo_new); // store the old state of the cells in the recovery stack - states.push_back({head, (uint32_t) head_new, cells.cp(head_new, ubatch.n_tokens)}); + { + state_t state = { sinfo_new, v_heads, {} }; + + for (uint32_t s = 0; s < sinfo_new.n_stream(); ++s) { + auto & cells = v_cells[sinfo_new.strm[s]]; + + state.v_cells.push_back(cells.cp(sinfo_new.idxs[s])); + } + + states.push_back(std::move(state)); + } // now emplace the ubatch - apply_ubatch(head_new, ubatch); + apply_ubatch(sinfo_new, ubatch); } + GGML_ASSERT(!states.empty() || !success); + // iterate backwards and restore the cells to their original state for (auto it = states.rbegin(); it != states.rend(); ++it) { - cells.set(it->head_new, it->cells); - head = it->head_old; + const auto & sinfo = it->sinfo; + + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + auto & cells = v_cells[sinfo.strm[s]]; + auto & head = v_heads[sinfo.strm[s]]; + + cells.set(sinfo.idxs[s], it->v_cells[s]); + head = it->v_heads_old[s]; + } } if (!success) { @@ -416,11 +613,38 @@ llama_kv_cache_unified::ubatch_heads llama_kv_cache_unified::prepare(const std:: return res; } -bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const defrag_info & dinfo) { +bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const defrag_info & dinfo, const stream_copy_info & sc_info) { bool updated = false; auto * sched = lctx->get_sched(); + if (!sc_info.empty()) { + assert(n_stream > 1 && "stream copy should never happen with a single stream"); + + llama_synchronize(lctx); + + const size_t n_copy = sc_info.ssrc.size(); + + for (size_t i = 0; i < n_copy; ++i) { + const auto ssrc = sc_info.ssrc[i]; + const auto sdst = sc_info.sdst[i]; + + assert(ssrc < n_stream); + assert(sdst < n_stream); + + LLAMA_LOG_DEBUG("%s: copying KV buffer: stream %d to stream %d\n", __func__, ssrc, sdst); + + assert(ssrc != sdst); + + for (uint32_t il = 0; il < layers.size(); ++il) { + const auto & layer = layers[il]; + + ggml_backend_tensor_copy(layer.k_stream[ssrc], layer.k_stream[sdst]); + ggml_backend_tensor_copy(layer.v_stream[ssrc], layer.v_stream[sdst]); + } + } + } + if (do_shift) { if (!get_can_shift()) { GGML_ABORT("The current KV cache / model configuration does not support K-shift"); @@ -432,14 +656,11 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d if (hparams.rope_type != LLAMA_ROPE_TYPE_NONE) { ggml_backend_sched_reset(sched); - auto * gf = lctx->graph_init(); + auto * res = lctx->get_gf_res_reserve(); - auto res = build_graph_shift(lctx->get_cparams(), lctx->get_ctx_compute(), gf); - if (!res) { - LLAMA_LOG_ERROR("%s: failed to build graph for K-shift\n", __func__); - return updated; - } + res->reset(); + auto * gf = build_graph_shift(res, lctx); if (!ggml_backend_sched_alloc_graph(sched, gf)) { LLAMA_LOG_ERROR("%s: failed to allocate compute graph for K-shift\n", __func__); return updated; @@ -455,12 +676,20 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d updated = true; } - cells.reset_shift(); + for (uint32_t s = 0; s < n_stream; ++s) { + auto & cells = v_cells[s]; + + cells.reset_shift(); + } } if (!dinfo.empty()) { LLAMA_LOG_DEBUG("%s: defragmenting KV cache\n", __func__); + // note: for now do not consider defrag for n_stream > 1 + auto & cells = v_cells[seq_to_stream[0]]; + auto & head = v_heads[seq_to_stream[0]]; + // apply moves: { const auto n_kv = dinfo.ids.size(); @@ -481,14 +710,11 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d ggml_backend_sched_reset(sched); - auto * gf = lctx->graph_init(); + auto * res = lctx->get_gf_res_reserve(); - auto res = build_graph_defrag(lctx->get_cparams(), lctx->get_ctx_compute(), gf, dinfo); - if (!res) { - LLAMA_LOG_ERROR("%s: failed to build graph for defrag\n", __func__); - return updated; - } + res->reset(); + auto * gf = build_graph_defrag(res, lctx, dinfo); if (!ggml_backend_sched_alloc_graph(sched, gf)) { LLAMA_LOG_ERROR("%s: failed to allocate compute graph for defrag\n", __func__); return updated; @@ -507,24 +733,14 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d return updated; } -int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const { - const uint32_t n_tokens = ubatch.n_tokens; - - uint32_t head_cur = this->head; +llama_kv_cache_unified::slot_info llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch, bool cont) const { + if (debug > 0) { + const auto & cells = v_cells[seq_to_stream[1]]; - // if we have enough unused cells before the current head -> - // better to start searching from the beginning of the cache, hoping to fill it - if (head_cur > cells.get_used() + 2*ubatch.n_tokens) { - head_cur = 0; - } + const uint32_t head_cur = v_heads[1]; - if (n_tokens > cells.size()) { - LLAMA_LOG_ERROR("%s: n_tokens = %d > size = %u\n", __func__, n_tokens, cells.size()); - return -1; - } - - if (debug > 0) { - LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", __func__, cells.used_max_p1(), cells.get_used(), head, get_size(), n_swa); + LLAMA_LOG_DEBUG("%s: n = %5d, used = %5d, head = %5d, size = %5d, n_swa = %5d\n", + __func__, cells.used_max_p1(), cells.get_used(), head_cur, get_size(), n_swa); if ((debug == 2 && n_swa > 0) || debug > 2) { std::string ss; @@ -572,7 +788,7 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const { LLAMA_LOG_DEBUG("\n%s\n", ss.c_str()); } - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { if (cells.seq_pos_min(s) < 0) { continue; } @@ -581,102 +797,170 @@ int32_t llama_kv_cache_unified::find_slot(const llama_ubatch & ubatch) const { } } - uint32_t n_tested = 0; + uint32_t n_tokens = ubatch.n_tokens; + uint32_t n_seqs = 1; + + if (n_stream > 1) { + GGML_ASSERT(n_tokens % ubatch.n_seqs_unq == 0); + + n_seqs = ubatch.n_seqs_unq; + n_tokens = n_tokens / n_seqs; + } + + slot_info res = { + /*.s0 =*/ LLAMA_MAX_SEQ, + /*.s1 =*/ 0, + /*.strm =*/ { }, + /*.idxs =*/ { }, + }; + + res.resize(n_seqs); + + for (uint32_t s = 0; s < n_seqs; ++s) { + const auto seq_id = ubatch.seq_id_unq[s]; + + if (n_stream > 1) { + GGML_ASSERT(ubatch.n_seq_id[s*n_tokens] == 1); + GGML_ASSERT(ubatch.seq_id [s*n_tokens][0] == seq_id); + } + + res.s0 = std::min(res.s0, seq_to_stream[seq_id]); + res.s1 = std::max(res.s1, seq_to_stream[seq_id]); - while (true) { - if (head_cur + n_tokens > cells.size()) { - n_tested += cells.size() - head_cur; + res.strm[s] = seq_to_stream[seq_id]; + res.idxs[s].reserve(n_tokens); + + const auto & cells = v_cells[seq_to_stream[seq_id]]; + + uint32_t head_cur = v_heads[seq_to_stream[seq_id]]; + + // if we have enough unused cells before the current head -> + // better to start searching from the beginning of the cache, hoping to fill it + if (head_cur > cells.get_used() + 2*n_tokens) { head_cur = 0; - continue; } - bool found = true; - for (uint32_t i = 0; i < n_tokens; i++) { - //const llama_pos pos = ubatch.pos[i]; - //const llama_seq_id seq_id = ubatch.seq_id[i][0]; + if (n_tokens > cells.size()) { + LLAMA_LOG_ERROR("%s: n_tokens = %d > size = %u\n", __func__, n_tokens, cells.size()); + return { }; + } + + uint32_t n_tested = 0; + + // for continuous slots, we test that all tokens in the ubatch fit, starting from the current head + // for non-continuous slots, we test the tokens one by one + const uint32_t n_test = cont ? n_tokens : 1; + + while (true) { + if (head_cur + n_test > cells.size()) { + n_tested += cells.size() - head_cur; + head_cur = 0; + continue; + } + + for (uint32_t i = 0; i < n_test; i++) { + const auto idx = head_cur; + + head_cur++; + n_tested++; - // can we use this cell? either: - // - the cell is empty - // - the cell is occupied only by one sequence: - // - (disabled) mask causally, if the sequence is the same as the one we are inserting - // - mask SWA, using current max pos for that sequence in the cache - // always insert in the cell with minimum pos - bool can_use = cells.is_empty(head_cur + i); + //const llama_pos pos = ubatch.pos[i]; + //const llama_seq_id seq_id = ubatch.seq_id[i][0]; - if (!can_use && cells.seq_count(head_cur + i) == 1) { - const llama_pos pos_cell = cells.pos_get(head_cur + i); + // can we use this cell? either: + // - the cell is empty + // - the cell is occupied only by one sequence: + // - (disabled) mask causally, if the sequence is the same as the one we are inserting + // - mask SWA, using current max pos for that sequence in the cache + // always insert in the cell with minimum pos + bool can_use = cells.is_empty(idx); - // (disabled) causal mask - // note: it's better to purge any "future" tokens beforehand - //if (cells.seq_has(head_cur + i, seq_id)) { - // can_use = pos_cell >= pos; - //} + if (!can_use && cells.seq_count(idx) == 1) { + const llama_pos pos_cell = cells.pos_get(idx); - if (!can_use) { - const llama_seq_id seq_id_cell = cells.seq_get(head_cur + i); + // (disabled) causal mask + // note: it's better to purge any "future" tokens beforehand + //if (cells.seq_has(idx, seq_id)) { + // can_use = pos_cell >= pos; + //} - // SWA mask - if (is_masked_swa(pos_cell, cells.seq_pos_max(seq_id_cell) + 1)) { - can_use = true; + if (!can_use) { + const llama_seq_id seq_id_cell = cells.seq_get(idx); + + // SWA mask + if (is_masked_swa(pos_cell, cells.seq_pos_max(seq_id_cell) + 1)) { + can_use = true; + } + } + } + + if (can_use) { + res.idxs[s].push_back(idx); + } else { + if (cont) { + break; } } } - if (!can_use) { - found = false; - head_cur += i + 1; - n_tested += i + 1; + if (res.idxs[s].size() == n_tokens) { break; } - } - if (found) { - break; + if (cont) { + res.idxs[s].clear(); + } + + if (n_tested >= cells.size()) { + //LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens); + return { }; + } } - if (n_tested >= cells.size()) { - //LLAMA_LOG_ERROR("%s: failed to find a slot for %d tokens\n", __func__, n_tokens); - return -1; + // we didn't find a suitable slot - return empty result + if (res.idxs[s].size() < n_tokens) { + return { }; } } - return head_cur; -} + assert(res.s1 >= res.s0); -void llama_kv_cache_unified::apply_ubatch(uint32_t head_cur, const llama_ubatch & ubatch) { - if (debug > 0) { - LLAMA_LOG_DEBUG("%s: ubatch info:\n", __func__); - LLAMA_LOG_DEBUG("%s: n_tokens = %d, equal_seqs = %d\n", __func__, ubatch.n_tokens, ubatch.equal_seqs); - LLAMA_LOG_DEBUG("%s: n_seq_tokens = %d, n_seqs = %d\n", __func__, ubatch.n_seq_tokens, ubatch.n_seqs); - } + return res; +} +void llama_kv_cache_unified::apply_ubatch(const slot_info & sinfo, const llama_ubatch & ubatch) { // keep track of the max sequence position that we would overwrite with this ubatch // for non-SWA cache, this would be always empty - llama_seq_id seq_pos_max_rm[LLAMA_MAX_PARALLEL_SEQUENCES]; - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + llama_seq_id seq_pos_max_rm[LLAMA_MAX_SEQ]; + for (uint32_t s = 0; s < LLAMA_MAX_SEQ; ++s) { seq_pos_max_rm[s] = -1; } - for (uint32_t s = 0; s < ubatch.n_seqs; ++s) { - for (uint32_t j = 0; j < ubatch.n_seq_tokens; ++j) { - const uint32_t idx = s*ubatch.n_seq_tokens + j; + assert(ubatch.n_tokens == sinfo.n_stream()*sinfo.size()); + + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + for (uint32_t ii = 0; ii < sinfo.size(); ++ii) { + const uint32_t i = s*sinfo.size() + ii; - if (!cells.is_empty(head_cur + idx)) { - assert(cells.seq_count(head_cur + idx) == 1); + auto & cells = v_cells[sinfo.strm[s]]; - const llama_seq_id seq_id = cells.seq_get(head_cur + idx); - const llama_pos pos = cells.pos_get(head_cur + idx); + const auto idx = sinfo.idxs[s][ii]; + + if (!cells.is_empty(idx)) { + assert(cells.seq_count(idx) == 1); + + const llama_seq_id seq_id = cells.seq_get(idx); + const llama_pos pos = cells.pos_get(idx); seq_pos_max_rm[seq_id] = std::max(seq_pos_max_rm[seq_id], pos); - cells.rm(head_cur + idx); + cells.rm(idx); } - cells.pos_set(head_cur + idx, ubatch.pos[idx]); + cells.pos_set(idx, ubatch.pos[i]); - // TODO: fix indexing [UBATCH_IDX] - for (int32_t i = 0; i < ubatch.n_seq_id[s]; i++) { - cells.seq_add(head_cur + idx, ubatch.seq_id[s][i]); + for (int32_t s = 0; s < ubatch.n_seq_id[i]; s++) { + cells.seq_add(idx, ubatch.seq_id[i][s]); } } } @@ -684,11 +968,15 @@ void llama_kv_cache_unified::apply_ubatch(uint32_t head_cur, const llama_ubatch // note: we want to preserve the invariant that all positions between [pos_min, pos_max] for each sequence // will be present in the cache. so we have to purge any position which is less than those we would overwrite // ref: https://github.com/ggml-org/llama.cpp/pull/13746#issuecomment-2916057092 - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (uint32_t s = 0; s < LLAMA_MAX_SEQ; ++s) { if (seq_pos_max_rm[s] == -1) { continue; } + GGML_ASSERT(s < seq_to_stream.size()); + + auto & cells = v_cells[seq_to_stream[s]]; + if (cells.seq_pos_min(s) <= seq_pos_max_rm[s]) { LLAMA_LOG_DEBUG("%s: purging positions [%d, %d] of sequence %d from KV cache\n", __func__, cells.seq_pos_min(s), seq_pos_max_rm[s], s); @@ -696,8 +984,13 @@ void llama_kv_cache_unified::apply_ubatch(uint32_t head_cur, const llama_ubatch seq_rm(s, cells.seq_pos_min(s), seq_pos_max_rm[s] + 1); } } + // move the head at the end of the slot - head = head_cur + ubatch.n_tokens; + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + auto & head = v_heads[sinfo.strm[s]]; + + head = sinfo.idxs[s].back() + 1; + } } bool llama_kv_cache_unified::get_can_shift() const { @@ -705,101 +998,290 @@ bool llama_kv_cache_unified::get_can_shift() const { } uint32_t llama_kv_cache_unified::get_size() const { + const auto & cells = v_cells[seq_to_stream[0]]; + return cells.size(); } +uint32_t llama_kv_cache_unified::get_n_stream() const { + return n_stream; +} + bool llama_kv_cache_unified::get_has_shift() const { - return cells.get_has_shift(); + bool result = false; + + for (uint32_t s = 0; s < n_stream; ++s) { + result |= v_cells[s].get_has_shift(); + } + + return result; } uint32_t llama_kv_cache_unified::get_n_kv() const { - return std::min(cells.size(), std::max(n_pad, GGML_PAD(cells.used_max_p1(), n_pad))); + uint32_t result = 0; + + for (uint32_t s = 0; s < n_stream; ++s) { + const auto & cells = v_cells[s]; + + result = std::max(std::min(cells.size(), std::max(n_pad, GGML_PAD(cells.used_max_p1(), n_pad))), result); + } + + return result; +} + +bool llama_kv_cache_unified::get_supports_set_rows() const { + return supports_set_rows; } -ggml_tensor * llama_kv_cache_unified::get_k(ggml_context * ctx, int32_t il, uint32_t n_kv) const { +ggml_tensor * llama_kv_cache_unified::get_k(ggml_context * ctx, int32_t il, uint32_t n_kv, const slot_info & sinfo) const { const int32_t ikv = map_layer_ids.at(il); auto * k = layers[ikv].k; - return ggml_view_3d(ctx, k, - hparams.n_embd_head_k, hparams.n_head_kv(il), n_kv, + const uint64_t kv_size = get_size(); + const uint64_t n_embd_k_gqa = k->ne[0]; + + assert(n_embd_k_gqa == hparams.n_embd_k_gqa(il)); + + const uint32_t ns = sinfo.s1 - sinfo.s0 + 1; + + return ggml_view_4d(ctx, k, + hparams.n_embd_head_k, hparams.n_head_kv(il), n_kv, ns, ggml_row_size(k->type, hparams.n_embd_head_k), - ggml_row_size(k->type, hparams.n_embd_k_gqa(il)), - 0); + ggml_row_size(k->type, n_embd_k_gqa), + ggml_row_size(k->type, n_embd_k_gqa*kv_size), + ggml_row_size(k->type, n_embd_k_gqa*kv_size)*sinfo.s0); } -ggml_tensor * llama_kv_cache_unified::get_v(ggml_context * ctx, int32_t il, uint32_t n_kv) const { +ggml_tensor * llama_kv_cache_unified::get_v(ggml_context * ctx, int32_t il, uint32_t n_kv, const slot_info & sinfo) const { const int32_t ikv = map_layer_ids.at(il); auto * v = layers[ikv].v; + const uint64_t kv_size = get_size(); + const uint64_t n_embd_v_gqa = v->ne[0]; + + // [TAG_V_CACHE_VARIABLE] + assert(n_embd_v_gqa >= hparams.n_embd_v_gqa(il)); + + const uint32_t ns = sinfo.s1 - sinfo.s0 + 1; + if (!v_trans) { // note: v->nb[1] <= v->nb[2] - return ggml_view_3d(ctx, v, - hparams.n_embd_head_v, hparams.n_head_kv(il), n_kv, - ggml_row_size(v->type, hparams.n_embd_head_v), // v->nb[1] - ggml_row_size(v->type, hparams.n_embd_v_gqa(il)), // v->nb[2] - 0); + return ggml_view_4d(ctx, v, + hparams.n_embd_head_v, hparams.n_head_kv(il), n_kv, ns, + ggml_row_size(v->type, hparams.n_embd_head_v), // v->nb[1] + ggml_row_size(v->type, n_embd_v_gqa), // v->nb[2] + ggml_row_size(v->type, n_embd_v_gqa*kv_size), // v->nb[3] + ggml_row_size(v->type, n_embd_v_gqa*kv_size)*sinfo.s0); } // note: v->nb[1] > v->nb[2] - return ggml_view_3d(ctx, v, - n_kv, hparams.n_head_kv(il), hparams.n_embd_head_v, - ggml_row_size(v->type, v->ne[1]*hparams.n_embd_head_v), // v->nb[1] - ggml_row_size(v->type, v->ne[1]), // v->nb[2] - 0); + return ggml_view_4d(ctx, v, + n_kv, hparams.n_head_kv(il), hparams.n_embd_head_v, ns, + ggml_row_size(v->type, kv_size*hparams.n_embd_head_v), // v->nb[1] + ggml_row_size(v->type, kv_size), // v->nb[2] + ggml_row_size(v->type, kv_size*n_embd_v_gqa), // v->nb[3] + ggml_row_size(v->type, kv_size*n_embd_v_gqa)*sinfo.s0); } -ggml_tensor * llama_kv_cache_unified::cpy_k(ggml_context * ctx, ggml_tensor * k_cur, int32_t il, uint32_t head_cur) const { +ggml_tensor * llama_kv_cache_unified::cpy_k(ggml_context * ctx, ggml_tensor * k_cur, ggml_tensor * k_idxs, int32_t il, const slot_info & sinfo) const { const int32_t ikv = map_layer_ids.at(il); auto * k = layers[ikv].k; + const int64_t n_embd_k_gqa = k->ne[0]; const int64_t n_tokens = k_cur->ne[2]; + k_cur = ggml_reshape_2d(ctx, k_cur, k->ne[0], n_tokens); + + if (k_idxs && supports_set_rows) { + if (k->ne[2] > 1) { + k = ggml_reshape_2d(ctx, k, k->ne[0], k->ne[1]*k->ne[2]); + } + + return ggml_set_rows(ctx, k, k_cur, k_idxs); + } + + // TODO: fallback to old ggml_cpy() method for backwards compatibility + // will be removed when ggml_set_rows() is adopted by all backends + + GGML_ASSERT(n_stream == 1 && "n_stream > 1 not supported without LLAMA_SET_ROWS"); + ggml_tensor * k_view = ggml_view_1d(ctx, k, - n_tokens*hparams.n_embd_k_gqa(il), - ggml_row_size(k->type, hparams.n_embd_k_gqa(il))*head_cur); + n_tokens*n_embd_k_gqa, + ggml_row_size(k->type, n_embd_k_gqa)*sinfo.head()); return ggml_cpy(ctx, k_cur, k_view); } -ggml_tensor * llama_kv_cache_unified::cpy_v(ggml_context * ctx, ggml_tensor * v_cur, int32_t il, uint32_t head_cur) const { +ggml_tensor * llama_kv_cache_unified::cpy_v(ggml_context * ctx, ggml_tensor * v_cur, ggml_tensor * v_idxs, int32_t il, const slot_info & sinfo) const { const int32_t ikv = map_layer_ids.at(il); auto * v = layers[ikv].v; - const int64_t n_tokens = v_cur->ne[2]; + const int64_t n_embd_v_gqa = v_cur->ne[0]*v_cur->ne[1]; + const int64_t n_tokens = v_cur->ne[2]; + + v_cur = ggml_reshape_2d(ctx, v_cur, n_embd_v_gqa, n_tokens); + + if (v_idxs && supports_set_rows) { + if (!v_trans) { + if (v->ne[2] > 1) { + v = ggml_reshape_2d(ctx, v, v->ne[0], v->ne[1]*v->ne[2]); + } + + return ggml_set_rows(ctx, v, v_cur, v_idxs); + } + + // [TAG_V_CACHE_VARIABLE] + if (n_embd_v_gqa < v->ne[0]) { + v_cur = ggml_pad(ctx, v_cur, v->ne[0] - n_embd_v_gqa, 0, 0, 0); + } + + // the row becomes a single element + ggml_tensor * v_view = ggml_reshape_2d(ctx, v, 1, v->ne[0]*v->ne[1]*v->ne[2]); + + v_cur = ggml_reshape_2d(ctx, v_cur, 1, v_cur->ne[0]*v_cur->ne[1]); + + return ggml_set_rows(ctx, v_view, v_cur, v_idxs); + } + + // TODO: fallback to old ggml_cpy() method for backwards compatibility + // will be removed when ggml_set_rows() is adopted by all backends - v_cur = ggml_reshape_2d(ctx, v_cur, hparams.n_embd_v_gqa(il), n_tokens); + GGML_ASSERT(n_stream == 1 && "n_stream > 1 not supported without LLAMA_SET_ROWS"); ggml_tensor * v_view = nullptr; if (!v_trans) { v_view = ggml_view_1d(ctx, v, - n_tokens*hparams.n_embd_v_gqa(il), - ggml_row_size(v->type, hparams.n_embd_v_gqa(il))*head_cur); + n_tokens*n_embd_v_gqa, + ggml_row_size(v->type, n_embd_v_gqa)*sinfo.head()); } else { - // note: the V cache is transposed when not using flash attention - v_view = ggml_view_2d(ctx, v, n_tokens, hparams.n_embd_v_gqa(il), - (v->ne[1])*ggml_element_size(v), - (head_cur)*ggml_element_size(v)); - v_cur = ggml_transpose(ctx, v_cur); + + v_view = ggml_view_2d(ctx, v, n_tokens, n_embd_v_gqa, + (v->ne[1] )*ggml_element_size(v), + (sinfo.head())*ggml_element_size(v)); } return ggml_cpy(ctx, v_cur, v_view); } +ggml_tensor * llama_kv_cache_unified::build_input_k_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const { + const uint32_t n_tokens = ubatch.n_tokens; + + ggml_tensor * k_idxs = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, n_tokens); + + ggml_set_input(k_idxs); + + return k_idxs; +} + +ggml_tensor * llama_kv_cache_unified::build_input_v_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const { + const uint32_t n_tokens = ubatch.n_tokens; + + ggml_tensor * v_idxs; + + if (!v_trans) { + v_idxs = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, n_tokens); + } else { + v_idxs = ggml_new_tensor_1d(ctx, GGML_TYPE_I64, n_tokens*hparams.n_embd_v_gqa_max()); + } + + ggml_set_input(v_idxs); + + return v_idxs; +} + +void llama_kv_cache_unified::set_input_k_idxs(ggml_tensor * dst, const llama_ubatch * ubatch, const slot_info & sinfo) const { + if (!supports_set_rows) { + return; + } + + const uint32_t n_tokens = ubatch->n_tokens; + GGML_ASSERT(n_tokens == (int64_t) sinfo.size()*sinfo.n_stream()); + + GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); + int64_t * data = (int64_t *) dst->data; + + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + const int64_t offs = sinfo.strm[s]*get_size(); + + for (uint32_t i = 0; i < sinfo.size(); ++i) { + data[s*sinfo.size() + i] = offs + sinfo.idxs[s][i]; + } + } +} + +void llama_kv_cache_unified::set_input_v_idxs(ggml_tensor * dst, const llama_ubatch * ubatch, const slot_info & sinfo) const { + if (!supports_set_rows) { + return; + } + + const uint32_t n_tokens = ubatch->n_tokens; + GGML_ASSERT(n_tokens == (int64_t) sinfo.size()*sinfo.n_stream()); + + GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); + int64_t * data = (int64_t *) dst->data; + + if (!v_trans) { + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + const int64_t offs = sinfo.strm[s]*get_size(); + + for (uint32_t i = 0; i < sinfo.size(); ++i) { + data[s*sinfo.size() + i] = offs + sinfo.idxs[s][i]; + } + } + } else { + // note: the V cache is transposed when not using flash attention + const int64_t kv_size = get_size(); + + const int64_t n_embd_v_gqa = hparams.n_embd_v_gqa_max(); + + for (uint32_t s = 0; s < sinfo.n_stream(); ++s) { + const int64_t offs = sinfo.strm[s]*kv_size*n_embd_v_gqa; + + for (uint32_t i = 0; i < sinfo.size(); ++i) { + for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { + data[s*sinfo.size()*n_embd_v_gqa + i*n_embd_v_gqa + j] = offs + j*kv_size + sinfo.idxs[s][i]; + } + } + } + } +} + +void llama_kv_cache_unified::set_input_k_shift(ggml_tensor * dst) const { + GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); + + int32_t * data = (int32_t *) dst->data; + + for (uint32_t s = 0; s < n_stream; ++s) { + const auto & cells = v_cells[s]; + + for (uint32_t i = 0; i < cells.size(); ++i) { + data[s*cells.size() + i] = cells.is_empty(i) ? 0 : cells.get_shift(i); + } + } +} + void llama_kv_cache_unified::set_input_kq_mask(ggml_tensor * dst, const llama_ubatch * ubatch, bool causal_attn) const { - const uint32_t n_tokens = ubatch->n_tokens; - const uint32_t n_seq_tokens = ubatch->n_seq_tokens; - const uint32_t n_seqs = ubatch->n_seqs; + const uint32_t n_tokens = ubatch->n_tokens; GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); float * data = (float *) dst->data; - const int64_t n_kv = dst->ne[0]; + const int64_t n_kv = dst->ne[0]; + const int64_t n_stream = dst->ne[3]; // num streams in the current ubatch + + GGML_ASSERT(n_tokens%n_stream == 0); + + // n_tps == n_tokens_per_stream + const int64_t n_tps = n_tokens/n_stream; + const int64_t n_tps_pad = GGML_PAD(n_tps, GGML_KQ_MASK_PAD); + + std::fill(data, data + ggml_nelements(dst), -INFINITY); // Use only the previous KV cells of the correct sequence for each token of the ubatch. // It's assumed that if a token in the batch has multiple sequences, they are equivalent. @@ -813,86 +1295,69 @@ void llama_kv_cache_unified::set_input_kq_mask(ggml_tensor * dst, const llama_ub // xxxxx----- // xxxxx----- // To visualize the mask, see https://github.com/ggml-org/llama.cpp/pull/12615 + // TODO: optimize this section for (uint32_t h = 0; h < 1; ++h) { - for (uint32_t s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch->seq_id[s][0]; - - for (uint32_t j = 0; j < n_seq_tokens; ++j) { - const uint32_t idx = s*n_seq_tokens + j; + for (uint32_t s = 0; s < n_stream; ++s) { + for (uint32_t ii = 0; ii < n_tps; ++ii) { + const uint32_t i = s*n_tps + ii; - const llama_pos p1 = ubatch->pos[idx]; + const llama_seq_id seq_id = ubatch->seq_id[i][0]; - for (uint32_t i = 0; i < n_kv; ++i) { - float f = 0.0f; + const auto & cells = v_cells[seq_to_stream[seq_id]]; - bool masked = false; + const llama_pos p1 = ubatch->pos[i]; - if (cells.is_empty(i)) { - masked = true; - } else { - const llama_pos p0 = cells.pos_get(i); + const uint64_t idst = n_kv*(h*n_stream*n_tps_pad + s*n_tps_pad + ii); - // mask the token if not the same sequence - masked = masked || (!cells.seq_has(i, seq_id)); + for (uint32_t j = 0; j < n_kv; ++j) { + if (cells.is_empty(j)) { + continue; + } - // mask future tokens - masked = masked || (causal_attn && p0 > p1); + // mask the token if not the same sequence + if (!cells.seq_has(j, seq_id)) { + continue; + } - // apply SWA if any - masked = masked || (is_masked_swa(p0, p1)); + const llama_pos p0 = cells.pos_get(j); - if (!masked && hparams.use_alibi) { - f = -std::abs(p0 - p1); - } + // mask future tokens + if (causal_attn && p0 > p1) { + continue; } - if (masked) { - f = -INFINITY; + // apply SWA if any + if (is_masked_swa(p0, p1)) { + continue; } - data[h*(n_kv*n_tokens) + idx*n_kv + i] = f; - } - } - } - - // mask padded tokens - if (data) { - for (uint32_t j = n_tokens; j < GGML_PAD(n_tokens, GGML_KQ_MASK_PAD); ++j) { - for (uint32_t i = 0; i < n_kv; ++i) { - data[h*(n_kv*n_tokens) + j*n_kv + i] = -INFINITY; + data[idst + j] = hparams.use_alibi ? -std::abs(p0 - p1) : 0.0f; } } } } } -void llama_kv_cache_unified::set_input_k_shift(ggml_tensor * dst) const { - GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); - - int32_t * data = (int32_t *) dst->data; - - for (uint32_t i = 0; i < cells.size(); ++i) { - data[i] = cells.is_empty(i) ? 0 : cells.get_shift(i); - } -} - void llama_kv_cache_unified::set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const { const int64_t n_tokens = ubatch->n_tokens; + GGML_ASSERT(n_stream == 1 && "TODO: support multiple streams"); + const auto & cells = v_cells[0]; + GGML_ASSERT(ggml_backend_buffer_is_host(dst->buffer)); - GGML_ASSERT(!ubatch->equal_seqs); // TODO: use ubatch->n_seqs instead of failing + GGML_ASSERT(!ubatch->equal_seqs()); // TODO: use ubatch->n_seqs instead of failing int32_t * data = (int32_t *) dst->data; const int32_t n_kv = dst->ne[0]; for (int h = 0; h < 1; ++h) { - for (int j = 0; j < n_tokens; ++j) { - for (int i = 0; i < n_kv; ++i) { + for (int i = 0; i < n_tokens; ++i) { + for (int j = 0; j < n_kv; ++j) { // the position when the cells is empty is irrelevant - it will be masked out later in the attention - const llama_pos p0 = cells.is_empty(i) ? -1 : cells.pos_get(i); + const llama_pos p0 = cells.is_empty(j) ? -1 : cells.pos_get(j); - data[h*(n_kv*n_tokens) + j*n_kv + i] = llama_relative_position_bucket(p0, ubatch->pos[j], hparams.n_rel_attn_bkts, false); + data[h*(n_kv*n_tokens) + i*n_kv + j] = llama_relative_position_bucket(p0, ubatch->pos[i], hparams.n_rel_attn_bkts, false); } } } @@ -985,7 +1450,7 @@ class llm_graph_input_k_shift : public llm_graph_input_i { void set_input(const llama_ubatch * ubatch) override; - ggml_tensor * k_shift; // I32 [kv_size] + ggml_tensor * k_shift; // I32 [kv_size*n_stream] const llama_kv_cache_unified * kv_self; }; @@ -998,20 +1463,20 @@ void llm_graph_input_k_shift::set_input(const llama_ubatch * ubatch) { } } -llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift( - const llama_cparams & cparams, - ggml_context * ctx, - ggml_cgraph * gf) const { - auto res = std::make_unique(); +ggml_cgraph * llama_kv_cache_unified::build_graph_shift(llm_graph_result * res, llama_context * lctx) const { + auto * ctx = res->get_ctx(); + auto * gf = res->get_gf(); const auto & n_embd_head_k = hparams.n_embd_head_k; //const auto & n_embd_head_v = hparams.n_embd_head_v; auto inp = std::make_unique(this); - inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, cells.size()); + inp->k_shift = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, (int64_t) get_size()*n_stream); ggml_set_input(inp->k_shift); + const auto & cparams = lctx->get_cparams(); + for (const auto & layer : layers) { const uint32_t il = layer.il; @@ -1025,7 +1490,7 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift( ggml_tensor * k = ggml_view_3d(ctx, layer.k, - n_embd_head_k, n_head_kv, cells.size(), + n_embd_head_k, n_head_kv, get_size()*n_stream, ggml_row_size(layer.k->type, n_embd_head_k), ggml_row_size(layer.k->type, n_embd_k_gqa), 0); @@ -1037,18 +1502,24 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_shift( res->add_input(std::move(inp)); - return res; + return gf; } -llm_graph_result_ptr llama_kv_cache_unified::build_graph_defrag( - const llama_cparams & cparams, - ggml_context * ctx, - ggml_cgraph * gf, - const defrag_info & dinfo) const { - auto res = std::make_unique(); +ggml_cgraph * llama_kv_cache_unified::build_graph_defrag( + llm_graph_result * res, + llama_context * lctx, + const defrag_info & dinfo) const { + auto * ctx = res->get_ctx(); + auto * gf = res->get_gf(); + + GGML_ASSERT(n_stream == 1 && "n_stream > 1 does not support defrag"); + + const auto & cells = v_cells[0]; const auto & ids = dinfo.ids; + const auto & cparams = lctx->get_cparams(); + #if 0 // CPU defrag // @@ -1185,10 +1656,14 @@ llm_graph_result_ptr llama_kv_cache_unified::build_graph_defrag( //LLAMA_LOG_INFO("gf->n_nodes = %d\n", gf->n_nodes); #endif - return res; + return gf; } llama_kv_cache_unified::defrag_info llama_kv_cache_unified::defrag_prepare(int32_t n_max_nodes) const { + GGML_ASSERT(n_stream == 1 && "n_stream > 1 does not support defrag"); + + const auto & cells = v_cells[0]; + const uint32_t n_layer = layers.size(); const uint32_t n_kv = cells.used_max_p1(); @@ -1334,64 +1809,94 @@ bool llama_kv_cache_unified::is_masked_swa(llama_pos p0, llama_pos p1) const { } void llama_kv_cache_unified::state_write(llama_io_write_i & io, llama_seq_id seq_id) const { - std::vector> cell_ranges; // ranges, from inclusive, to exclusive - uint32_t cell_count = 0; + io.write(&n_stream, sizeof(n_stream)); - // Count the number of cells with the specified seq_id - // Find all the ranges of cells with this seq id (or all, when -1) - uint32_t cell_range_begin = cells.size(); + for (uint32_t s = 0; s < n_stream; ++s) { + cell_ranges_t cr { s, {} }; - for (uint32_t i = 0; i < cells.size(); ++i) { - if (!cells.is_empty(i) && (seq_id == -1 || cells.seq_has(i, seq_id))) { - ++cell_count; - if (cell_range_begin == cells.size()) { - cell_range_begin = i; - } - } else { - if (cell_range_begin != cells.size()) { - cell_ranges.emplace_back(cell_range_begin, i); - cell_range_begin = cells.size(); + uint32_t cell_count = 0; + + const auto & cells = v_cells[s]; + + // Count the number of cells with the specified seq_id + // Find all the ranges of cells with this seq id (or all, when -1) + uint32_t cell_range_begin = cells.size(); + + for (uint32_t i = 0; i < cells.size(); ++i) { + if (!cells.is_empty(i) && (seq_id == -1 || cells.seq_has(i, seq_id))) { + ++cell_count; + if (cell_range_begin == cells.size()) { + cell_range_begin = i; + } + } else { + if (cell_range_begin != cells.size()) { + cr.data.emplace_back(cell_range_begin, i); + cell_range_begin = cells.size(); + } } } - } - if (cell_range_begin != cells.size()) { - cell_ranges.emplace_back(cell_range_begin, cells.size()); - } + if (cell_range_begin != cells.size()) { + cr.data.emplace_back(cell_range_begin, cells.size()); + } - // DEBUG CHECK: Sum of cell counts in ranges should equal the total cell count - uint32_t cell_count_check = 0; - for (const auto & range : cell_ranges) { - cell_count_check += range.second - range.first; - } - GGML_ASSERT(cell_count == cell_count_check); + // DEBUG CHECK: Sum of cell counts in ranges should equal the total cell count + uint32_t cell_count_check = 0; + for (const auto & range : cr.data) { + cell_count_check += range.second - range.first; + } + GGML_ASSERT(cell_count == cell_count_check); + + io.write(&cell_count, sizeof(cell_count)); - io.write(&cell_count, sizeof(cell_count)); + // skip empty streams + if (cell_count == 0) { + continue; + } - state_write_meta(io, cell_ranges, seq_id); - state_write_data(io, cell_ranges); + state_write_meta(io, cr, seq_id); + state_write_data(io, cr); + } } void llama_kv_cache_unified::state_read(llama_io_read_i & io, llama_seq_id seq_id) { - uint32_t cell_count; - io.read_to(&cell_count, sizeof(cell_count)); + GGML_ASSERT(seq_id == -1 || (seq_id >= 0 && (size_t) seq_id < seq_to_stream.size())); + + uint32_t n_stream_cur; + io.read_to(&n_stream_cur, sizeof(n_stream_cur)); + if (n_stream_cur != n_stream) { + throw std::runtime_error("n_stream mismatch"); + } - bool res = true; - res = res && state_read_meta(io, cell_count, seq_id); - res = res && state_read_data(io, cell_count); + for (uint32_t s = 0; s < n_stream; ++s) { + uint32_t cell_count; + io.read_to(&cell_count, sizeof(cell_count)); - if (!res) { - if (seq_id == -1) { - clear(true); - } else { - seq_rm(seq_id, -1, -1); + if (cell_count == 0) { + continue; + } + + const uint32_t strm = seq_id == -1 ? s : seq_to_stream[seq_id]; + + bool res = true; + res = res && state_read_meta(io, strm, cell_count, seq_id); + res = res && state_read_data(io, strm, cell_count); + + if (!res) { + if (seq_id == -1) { + clear(true); + } else { + seq_rm(seq_id, -1, -1); + } + throw std::runtime_error("failed to restore kv cache"); } - throw std::runtime_error("failed to restore kv cache"); } } -void llama_kv_cache_unified::state_write_meta(llama_io_write_i & io, const std::vector> & cell_ranges, llama_seq_id seq_id) const { - for (const auto & range : cell_ranges) { +void llama_kv_cache_unified::state_write_meta(llama_io_write_i & io, const cell_ranges_t & cr, llama_seq_id seq_id) const { + const auto & cells = v_cells[cr.strm]; + + for (const auto & range : cr.data) { for (uint32_t i = range.first; i < range.second; ++i) { std::vector seq_ids; @@ -1416,7 +1921,9 @@ void llama_kv_cache_unified::state_write_meta(llama_io_write_i & io, const std:: } } -void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std::vector> & cell_ranges) const { +void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const cell_ranges_t & cr) const { + const auto & cells = v_cells[cr.strm]; + const uint32_t v_trans = this->v_trans ? 1 : 0; const uint32_t n_layer = layers.size(); @@ -1430,21 +1937,23 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std:: for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il) + hparams.n_embd_k_s(); + const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il); + + auto * k = layer.k_stream[cr.strm]; // Write key type - const int32_t k_type_i = (int32_t)layer.k->type; + const int32_t k_type_i = (int32_t) k->type; io.write(&k_type_i, sizeof(k_type_i)); // Write row size of key - const uint64_t k_size_row = ggml_row_size(layer.k->type, n_embd_k_gqa); + const uint64_t k_size_row = ggml_row_size(k->type, n_embd_k_gqa); io.write(&k_size_row, sizeof(k_size_row)); // Read each range of cells of k_size length each into tmp_buf and write out - for (const auto & range : cell_ranges) { + for (const auto & range : cr.data) { const size_t range_size = range.second - range.first; const size_t buf_size = range_size * k_size_row; - io.write_tensor(layer.k, range.first * k_size_row, buf_size); + io.write_tensor(k, range.first * k_size_row, buf_size); } } @@ -1452,21 +1961,23 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std:: for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il); + + auto * v = layer.v_stream[cr.strm]; // Write value type - const int32_t v_type_i = (int32_t)layer.v->type; + const int32_t v_type_i = (int32_t) v->type; io.write(&v_type_i, sizeof(v_type_i)); // Write row size of value - const uint64_t v_size_row = ggml_row_size(layer.v->type, n_embd_v_gqa); + const uint64_t v_size_row = ggml_row_size(v->type, n_embd_v_gqa); io.write(&v_size_row, sizeof(v_size_row)); // Read each range of cells of v_size length each into tmp_buf and write out - for (const auto & range : cell_ranges) { + for (const auto & range : cr.data) { const size_t range_size = range.second - range.first; const size_t buf_size = range_size * v_size_row; - io.write_tensor(layer.v, range.first * v_size_row, buf_size); + io.write_tensor(v, range.first * v_size_row, buf_size); } } } else { @@ -1476,14 +1987,16 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std:: for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il); + + auto * v = layer.v_stream[cr.strm]; // Write value type - const int32_t v_type_i = (int32_t)layer.v->type; + const int32_t v_type_i = (int32_t) v->type; io.write(&v_type_i, sizeof(v_type_i)); // Write element size - const uint32_t v_size_el = ggml_type_size(layer.v->type); + const uint32_t v_size_el = ggml_type_size(v->type); io.write(&v_size_el, sizeof(v_size_el)); // Write GQA embedding size @@ -1492,29 +2005,30 @@ void llama_kv_cache_unified::state_write_data(llama_io_write_i & io, const std:: // For each row, we get the element values of each cell for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { // Read each range of cells of v_size_el length each into tmp_buf and write out - for (const auto & range : cell_ranges) { + for (const auto & range : cr.data) { const size_t range_size = range.second - range.first; const size_t src_offset = (range.first + j * kv_size) * v_size_el; const size_t buf_size = range_size * v_size_el; - io.write_tensor(layer.v, src_offset, buf_size); + io.write_tensor(v, src_offset, buf_size); } } } } } -bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id) { +bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, llama_seq_id dest_seq_id) { + auto & cells = v_cells[strm]; + auto & head = v_heads[strm]; + if (dest_seq_id != -1) { // single sequence - seq_rm(dest_seq_id, -1, -1); - llama_sbatch sbatch; - llama_ubatch ubatch = sbatch.reserve_ubatch(cell_count, /* has_embd */ false); + llama_batch_allocr balloc(hparams.n_pos_per_embd()); + + llama_ubatch ubatch = balloc.ubatch_reserve(cell_count, 1); - ubatch.n_tokens = cell_count; - ubatch.n_seq_tokens = cell_count; - ubatch.n_seqs = 1; + ubatch.seq_id_unq[0] = dest_seq_id; for (uint32_t i = 0; i < cell_count; ++i) { llama_pos pos; @@ -1539,17 +2053,21 @@ bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t cell ubatch.seq_id[i] = &dest_seq_id; } - const auto head_cur = find_slot(ubatch); - if (head_cur < 0) { + const auto sinfo = find_slot(ubatch, true); + if (sinfo.empty()) { LLAMA_LOG_ERROR("%s: failed to find available cells in kv cache\n", __func__); return false; } - apply_ubatch(head_cur, ubatch); + apply_ubatch(sinfo, ubatch); + + const auto head_cur = sinfo.head(); // keep the head at the old position because we will read the KV data into it in state_read_data() head = head_cur; + LLAMA_LOG_DEBUG("%s: head_cur = %d, head = %d, cell_count = %d, dest_seq_id = %d\n", __func__, head_cur, head, cell_count, dest_seq_id); + // DEBUG CHECK: head_cur should be our first cell, head_cur + cell_count - 1 should be our last cell (verify seq_id and pos values) // Assume that this is one contiguous block of cells GGML_ASSERT(head_cur + cell_count <= cells.size()); @@ -1595,7 +2113,10 @@ bool llama_kv_cache_unified::state_read_meta(llama_io_read_i & io, uint32_t cell return true; } -bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell_count) { +bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t strm, uint32_t cell_count) { + auto & cells = v_cells[strm]; + auto & head = v_heads[strm]; + uint32_t v_trans; uint32_t n_layer; @@ -1621,12 +2142,14 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il) + hparams.n_embd_k_s(); + const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il); + + auto * k = layer.k_stream[strm]; // Read type of key int32_t k_type_i_ref; io.read_to(&k_type_i_ref, sizeof(k_type_i_ref)); - const int32_t k_type_i = (int32_t) layer.k->type; + const int32_t k_type_i = (int32_t) k->type; if (k_type_i != k_type_i_ref) { LLAMA_LOG_ERROR("%s: mismatched key type (%d != %d, layer %d)\n", __func__, k_type_i, k_type_i_ref, il); return false; @@ -1635,7 +2158,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell // Read row size of key uint64_t k_size_row_ref; io.read_to(&k_size_row_ref, sizeof(k_size_row_ref)); - const size_t k_size_row = ggml_row_size(layer.k->type, n_embd_k_gqa); + const size_t k_size_row = ggml_row_size(k->type, n_embd_k_gqa); if (k_size_row != k_size_row_ref) { LLAMA_LOG_ERROR("%s: mismatched key row size (%zu != %zu, layer %d)\n", __func__, k_size_row, (size_t) k_size_row_ref, il); return false; @@ -1643,7 +2166,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell if (cell_count) { // Read and set the keys for the whole cell range - ggml_backend_tensor_set(layer.k, io.read(cell_count * k_size_row), head * k_size_row, cell_count * k_size_row); + ggml_backend_tensor_set(k, io.read(cell_count * k_size_row), head * k_size_row, cell_count * k_size_row); } } @@ -1651,12 +2174,14 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il); + + auto * v = layer.v_stream[strm]; // Read type of value int32_t v_type_i_ref; io.read_to(&v_type_i_ref, sizeof(v_type_i_ref)); - const int32_t v_type_i = (int32_t)layer.v->type; + const int32_t v_type_i = (int32_t) v->type; if (v_type_i != v_type_i_ref) { LLAMA_LOG_ERROR("%s: mismatched value type (%d != %d, layer %d)\n", __func__, v_type_i, v_type_i_ref, il); return false; @@ -1665,7 +2190,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell // Read row size of value uint64_t v_size_row_ref; io.read_to(&v_size_row_ref, sizeof(v_size_row_ref)); - const size_t v_size_row = ggml_row_size(layer.v->type, n_embd_v_gqa); + const size_t v_size_row = ggml_row_size(v->type, n_embd_v_gqa); if (v_size_row != v_size_row_ref) { LLAMA_LOG_ERROR("%s: mismatched value row size (%zu != %zu, layer %d)\n", __func__, v_size_row, (size_t) v_size_row_ref, il); return false; @@ -1673,7 +2198,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell if (cell_count) { // Read and set the values for the whole cell range - ggml_backend_tensor_set(layer.v, io.read(cell_count * v_size_row), head * v_size_row, cell_count * v_size_row); + ggml_backend_tensor_set(v, io.read(cell_count * v_size_row), head * v_size_row, cell_count * v_size_row); } } } else { @@ -1681,12 +2206,14 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell for (const auto & layer : layers) { const uint32_t il = layer.il; - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il); + + auto * v = layer.v_stream[strm]; // Read type of value int32_t v_type_i_ref; io.read_to(&v_type_i_ref, sizeof(v_type_i_ref)); - const int32_t v_type_i = (int32_t)layer.v->type; + const int32_t v_type_i = (int32_t) v->type; if (v_type_i != v_type_i_ref) { LLAMA_LOG_ERROR("%s: mismatched value type (%d != %d, layer %d)\n", __func__, v_type_i, v_type_i_ref, il); return false; @@ -1695,7 +2222,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell // Read element size of value uint32_t v_size_el_ref; io.read_to(&v_size_el_ref, sizeof(v_size_el_ref)); - const size_t v_size_el = ggml_type_size(layer.v->type); + const size_t v_size_el = ggml_type_size(v->type); if (v_size_el != v_size_el_ref) { LLAMA_LOG_ERROR("%s: mismatched value element size (%zu != %zu, layer %d)\n", __func__, v_size_el, (size_t) v_size_el_ref, il); return false; @@ -1713,7 +2240,7 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell // For each row in the transposed matrix, read the values for the whole cell range for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { const size_t dst_offset = (head + j * cells.size()) * v_size_el; - ggml_backend_tensor_set(layer.v, io.read(cell_count * v_size_el), dst_offset, cell_count * v_size_el); + ggml_backend_tensor_set(v, io.read(cell_count * v_size_el), dst_offset, cell_count * v_size_el); } } } @@ -1723,109 +2250,133 @@ bool llama_kv_cache_unified::state_read_data(llama_io_read_i & io, uint32_t cell } // -// llama_kv_cache_unified_state +// llama_kv_cache_unified_context // -llama_kv_cache_unified_state::llama_kv_cache_unified_state(llama_memory_status status) : status(status) {} +llama_kv_cache_unified_context::llama_kv_cache_unified_context(llama_memory_status status) : status(status) {} -llama_kv_cache_unified_state::llama_kv_cache_unified_state( +llama_kv_cache_unified_context::llama_kv_cache_unified_context( llama_kv_cache_unified * kv) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv) { n_kv = kv->get_size(); - head = 0; + + const uint32_t n_stream = kv->get_n_stream(); + + // create a dummy slot info - the actual data is irrelevant. we just need to build the graph + sinfos.resize(1); + sinfos[0].s0 = 0; + sinfos[0].s1 = n_stream - 1; + sinfos[0].idxs.resize(n_stream); + for (uint32_t s = 0; s < n_stream; ++s) { + sinfos[0].strm.push_back(s); + sinfos[0].idxs[s].resize(1, 0); + } } -llama_kv_cache_unified_state::llama_kv_cache_unified_state( +llama_kv_cache_unified_context::llama_kv_cache_unified_context( llama_kv_cache_unified * kv, llama_context * lctx, bool do_shift, - defrag_info dinfo) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), lctx(lctx), do_shift(do_shift), dinfo(std::move(dinfo)) { - if (!do_shift && dinfo.empty()) { + defrag_info dinfo, + stream_copy_info sc_info) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), lctx(lctx), do_shift(do_shift), dinfo(std::move(dinfo)), sc_info(std::move(sc_info)) { + if (!do_shift && this->dinfo.empty() && this->sc_info.empty()) { status = LLAMA_MEMORY_STATUS_NO_UPDATE; } } -llama_kv_cache_unified_state::llama_kv_cache_unified_state( +llama_kv_cache_unified_context::llama_kv_cache_unified_context( llama_kv_cache_unified * kv, - llama_sbatch sbatch, - llama_kv_cache_unified::ubatch_heads heads, - std::vector ubatches) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), sbatch(std::move(sbatch)), heads(std::move(heads)), ubatches(std::move(ubatches)) { + llama_kv_cache_unified::slot_info_vec_t sinfos, + std::vector ubatches) : status(LLAMA_MEMORY_STATUS_SUCCESS), kv(kv), sinfos(std::move(sinfos)), ubatches(std::move(ubatches)) { } -llama_kv_cache_unified_state::~llama_kv_cache_unified_state() = default; +llama_kv_cache_unified_context::~llama_kv_cache_unified_context() = default; -bool llama_kv_cache_unified_state::next() { +bool llama_kv_cache_unified_context::next() { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - if (++i_next >= ubatches.size()) { + if (++i_cur >= ubatches.size()) { return false; } return true; } -bool llama_kv_cache_unified_state::apply() { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); +bool llama_kv_cache_unified_context::apply() { + assert(!llama_memory_status_is_fail(status)); // no ubatches -> this is a KV cache update if (ubatches.empty()) { - kv->update(lctx, do_shift, dinfo); + kv->update(lctx, do_shift, dinfo, sc_info); return true; } - kv->apply_ubatch(heads[i_next], ubatches[i_next]); + kv->apply_ubatch(sinfos[i_cur], ubatches[i_cur]); n_kv = kv->get_n_kv(); - head = heads[i_next]; return true; } -std::vector & llama_kv_cache_unified_state::out_ids() { +llama_memory_status llama_kv_cache_unified_context::get_status() const { + return status; +} + +const llama_ubatch & llama_kv_cache_unified_context::get_ubatch() const { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); - return sbatch.out_ids; + return ubatches[i_cur]; } -llama_memory_status llama_kv_cache_unified_state::get_status() const { - return status; +uint32_t llama_kv_cache_unified_context::get_n_kv() const { + return n_kv; } -const llama_ubatch & llama_kv_cache_unified_state::get_ubatch() const { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); +bool llama_kv_cache_unified_context::get_supports_set_rows() const { + return kv->get_supports_set_rows(); +} - return ubatches[i_next]; +ggml_tensor * llama_kv_cache_unified_context::get_k(ggml_context * ctx, int32_t il) const { + return kv->get_k(ctx, il, n_kv, sinfos[i_cur]); } -uint32_t llama_kv_cache_unified_state::get_n_kv() const { - return n_kv; +ggml_tensor * llama_kv_cache_unified_context::get_v(ggml_context * ctx, int32_t il) const { + return kv->get_v(ctx, il, n_kv, sinfos[i_cur]); } -ggml_tensor * llama_kv_cache_unified_state::get_k(ggml_context * ctx, int32_t il) const { - return kv->get_k(ctx, il, n_kv); +ggml_tensor * llama_kv_cache_unified_context::cpy_k(ggml_context * ctx, ggml_tensor * k_cur, ggml_tensor * k_idxs, int32_t il) const { + return kv->cpy_k(ctx, k_cur, k_idxs, il, sinfos[i_cur]); } -ggml_tensor * llama_kv_cache_unified_state::get_v(ggml_context * ctx, int32_t il) const { - return kv->get_v(ctx, il, n_kv); +ggml_tensor * llama_kv_cache_unified_context::cpy_v(ggml_context * ctx, ggml_tensor * v_cur, ggml_tensor * v_idxs, int32_t il) const { + return kv->cpy_v(ctx, v_cur, v_idxs, il, sinfos[i_cur]); } -ggml_tensor * llama_kv_cache_unified_state::cpy_k(ggml_context * ctx, ggml_tensor * k_cur, int32_t il) const { - return kv->cpy_k(ctx, k_cur, il, head); +ggml_tensor * llama_kv_cache_unified_context::build_input_k_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const { + return kv->build_input_k_idxs(ctx, ubatch); } -ggml_tensor * llama_kv_cache_unified_state::cpy_v(ggml_context * ctx, ggml_tensor * v_cur, int32_t il) const { - return kv->cpy_v(ctx, v_cur, il, head); +ggml_tensor * llama_kv_cache_unified_context::build_input_v_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const { + return kv->build_input_v_idxs(ctx, ubatch); } -void llama_kv_cache_unified_state::set_input_k_shift(ggml_tensor * dst) const { +void llama_kv_cache_unified_context::set_input_k_shift(ggml_tensor * dst) const { kv->set_input_k_shift(dst); } -void llama_kv_cache_unified_state::set_input_kq_mask(ggml_tensor * dst, const llama_ubatch * ubatch, bool causal_attn) const { +void llama_kv_cache_unified_context::set_input_k_idxs(ggml_tensor * dst, const llama_ubatch * ubatch) const { + kv->set_input_k_idxs(dst, ubatch, sinfos[i_cur]); +} + +void llama_kv_cache_unified_context::set_input_v_idxs(ggml_tensor * dst, const llama_ubatch * ubatch) const { + kv->set_input_v_idxs(dst, ubatch, sinfos[i_cur]); +} + +void llama_kv_cache_unified_context::set_input_kq_mask(ggml_tensor * dst, const llama_ubatch * ubatch, bool causal_attn) const { kv->set_input_kq_mask(dst, ubatch, causal_attn); } -void llama_kv_cache_unified_state::set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const { +void llama_kv_cache_unified_context::set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const { kv->set_input_pos_bucket(dst, ubatch); } diff --git a/src/llama-kv-cache-unified.h b/src/llama-kv-cache-unified.h index d6dcd19f2507e..3e28e346c3fcf 100644 --- a/src/llama-kv-cache-unified.h +++ b/src/llama-kv-cache-unified.h @@ -24,8 +24,6 @@ class llama_kv_cache_unified : public llama_memory_i { // this callback is used to filter out layers that should not be included in the cache using layer_filter_cb = std::function; - using ubatch_heads = std::vector; - struct defrag_info { bool empty() const { return ids.empty(); @@ -37,6 +35,63 @@ class llama_kv_cache_unified : public llama_memory_i { std::vector ids; }; + struct stream_copy_info { + bool empty() const { + assert(ssrc.size() == sdst.size()); + return ssrc.empty(); + } + + std::vector ssrc; + std::vector sdst; + }; + + // for each ubatch, create a slot_info that contains information about where the ubatch should be inserted in the + // KV cells. for example, cell indices for each token, such that: token[i] -> goes to cells[idxs[i]] + struct slot_info { + // data for ggml_set_rows + using idx_vec_t = std::vector; + + // number of streams: ns = s1 - s0 + 1 + llama_seq_id s0; + llama_seq_id s1; + + std::vector strm; // [ns] + std::vector idxs; // [ns] + + uint32_t head() const { + GGML_ASSERT(idxs.size() == 1); + GGML_ASSERT(!idxs[0].empty()); + + return idxs[0][0]; + } + + void resize(size_t n) { + strm.resize(n); + idxs.resize(n); + } + + size_t size() const { + GGML_ASSERT(idxs.size() == strm.size()); + GGML_ASSERT(!idxs.empty()); + + return idxs[0].size(); + } + + size_t n_stream() const { + return strm.size(); + } + + bool empty() const { + return idxs.empty(); + } + + void clear() { + idxs.clear(); + } + }; + + using slot_info_vec_t = std::vector; + llama_kv_cache_unified( const llama_model & model, layer_filter_cb && filter, @@ -44,6 +99,7 @@ class llama_kv_cache_unified : public llama_memory_i { ggml_type type_v, bool v_trans, bool offload, + bool unified, uint32_t kv_size, uint32_t n_seq_max, uint32_t n_pad, @@ -56,14 +112,14 @@ class llama_kv_cache_unified : public llama_memory_i { // llama_memory_i // - llama_memory_state_ptr init_batch( - const llama_batch & batch, + llama_memory_context_ptr init_batch( + llama_batch_allocr & balloc, uint32_t n_ubatch, - bool embd_pooled) override; + bool embd_all) override; - llama_memory_state_ptr init_full() override; + llama_memory_context_ptr init_full() override; - llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) override; + llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override; bool get_can_shift() const override; @@ -87,7 +143,8 @@ class llama_kv_cache_unified : public llama_memory_i { // llama_kv_cache_unified specific API // - uint32_t get_size() const; + uint32_t get_size() const; + uint32_t get_n_stream() const; bool get_has_shift() const; @@ -97,37 +154,48 @@ class llama_kv_cache_unified : public llama_memory_i { uint32_t get_n_kv() const; + // TODO: temporary + bool get_supports_set_rows() const; + // get views of the current state of the cache - ggml_tensor * get_k(ggml_context * ctx, int32_t il, uint32_t n_kv) const; - ggml_tensor * get_v(ggml_context * ctx, int32_t il, uint32_t n_kv) const; + ggml_tensor * get_k(ggml_context * ctx, int32_t il, uint32_t n_kv, const slot_info & sinfo) const; + ggml_tensor * get_v(ggml_context * ctx, int32_t il, uint32_t n_kv, const slot_info & sinfo) const; // store k_cur and v_cur in the cache based on the provided head location - ggml_tensor * cpy_k(ggml_context * ctx, ggml_tensor * k_cur, int32_t il, uint32_t head_cur) const; - ggml_tensor * cpy_v(ggml_context * ctx, ggml_tensor * v_cur, int32_t il, uint32_t head_cur) const; + ggml_tensor * cpy_k(ggml_context * ctx, ggml_tensor * k_cur, ggml_tensor * k_idxs, int32_t il, const slot_info & sinfo) const; + ggml_tensor * cpy_v(ggml_context * ctx, ggml_tensor * v_cur, ggml_tensor * v_idxs, int32_t il, const slot_info & sinfo) const; // // preparation API // - // find places for the provided ubatches in the cache, returns the head locations + // find places for the provided ubatches in the cache, returns the slot infos // return empty vector on failure - ubatch_heads prepare(const std::vector & ubatches); + slot_info_vec_t prepare(const std::vector & ubatches); - bool update(llama_context * lctx, bool do_shift, const defrag_info & dinfo); + bool update(llama_context * lctx, bool do_shift, const defrag_info & dinfo, const stream_copy_info & sc_info); - // return the cell position where we can insert the ubatch - // return -1 on failure to find a contiguous slot of kv cells - int32_t find_slot(const llama_ubatch & ubatch) const; + // find a slot of kv cells that can hold the ubatch + // if cont == true, then the slot must be continuous + // return empty slot_info on failure + slot_info find_slot(const llama_ubatch & ubatch, bool cont) const; - // emplace the ubatch context into slot: [head_cur, head_cur + ubatch.n_tokens) - void apply_ubatch(uint32_t head_cur, const llama_ubatch & ubatch); + // emplace the ubatch context into slot: [sinfo.idxs[0...ubatch.n_tokens - 1]] + void apply_ubatch(const slot_info & sinfo, const llama_ubatch & ubatch); // - // set_input API + // input API // + ggml_tensor * build_input_k_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const; + ggml_tensor * build_input_v_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const; + + void set_input_k_idxs(ggml_tensor * dst, const llama_ubatch * ubatch, const slot_info & sinfo) const; + void set_input_v_idxs(ggml_tensor * dst, const llama_ubatch * ubatch, const slot_info & sinfo) const; + + void set_input_k_shift(ggml_tensor * dst) const; + void set_input_kq_mask (ggml_tensor * dst, const llama_ubatch * ubatch, bool causal_attn) const; - void set_input_k_shift (ggml_tensor * dst) const; void set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const; private: @@ -141,15 +209,15 @@ class llama_kv_cache_unified : public llama_memory_i { ggml_tensor * k; ggml_tensor * v; + + std::vector k_stream; + std::vector v_stream; }; bool v_trans = true; // the value tensor is transposed - // the current index from where we start searching for a free slot in the ring buffer of KV cells (see find_slot()) - // note: this is not part of the KV state and it's only used to speed-up the find_slot() method - uint32_t head = 0; - const uint32_t n_seq_max = 1; + const uint32_t n_stream = 1; // required padding const uint32_t n_pad = 1; @@ -157,14 +225,29 @@ class llama_kv_cache_unified : public llama_memory_i { // SWA const uint32_t n_swa = 0; + // env: LLAMA_KV_CACHE_DEBUG int debug = 0; + // env: LLAMA_SET_ROWS (temporary) + // ref: https://github.com/ggml-org/llama.cpp/pull/14285 + bool supports_set_rows = false; + const llama_swa_type swa_type = LLAMA_SWA_TYPE_NONE; std::vector ctxs; std::vector bufs; - llama_kv_cells_unified cells; + // the current index from where we start searching for a free slot in the ring buffer of KV cells (see find_slot()) + // note: this is not part of the KV state and it's only used to speed-up the find_slot() method + std::vector v_heads; + + std::vector v_cells; + + // maps from a sequence id to a stream id + std::vector seq_to_stream; + + // pending stream copies that will be applied during the next update + stream_copy_info sc_info; std::vector layers; @@ -190,81 +273,92 @@ class llama_kv_cache_unified : public llama_memory_i { float freq_base, float freq_scale) const; - llm_graph_result_ptr build_graph_shift( - const llama_cparams & cparams, - ggml_context * ctx, - ggml_cgraph * gf) const; + ggml_cgraph * build_graph_shift( + llm_graph_result * res, + llama_context * lctx) const; - llm_graph_result_ptr build_graph_defrag( - const llama_cparams & cparams, - ggml_context * ctx, - ggml_cgraph * gf, + ggml_cgraph * build_graph_defrag( + llm_graph_result * res, + llama_context * lctx, const defrag_info & dinfo) const; - void state_write_meta(llama_io_write_i & io, const std::vector> & cell_ranges, llama_seq_id seq_id = -1) const; - void state_write_data(llama_io_write_i & io, const std::vector> & cell_ranges) const; + struct cell_ranges_t { + uint32_t strm; - bool state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id = -1); - bool state_read_data(llama_io_read_i & io, uint32_t cell_count); + std::vector> data; // ranges, from inclusive, to exclusive + }; + + void state_write_meta(llama_io_write_i & io, const cell_ranges_t & cr, llama_seq_id seq_id = -1) const; + void state_write_data(llama_io_write_i & io, const cell_ranges_t & cr) const; + + bool state_read_meta(llama_io_read_i & io, uint32_t strm, uint32_t cell_count, llama_seq_id dest_seq_id = -1); + bool state_read_data(llama_io_read_i & io, uint32_t strm, uint32_t cell_count); }; -class llama_kv_cache_unified_state : public llama_memory_state_i { +class llama_kv_cache_unified_context : public llama_memory_context_i { public: // some shorthands - using ubatch_heads = llama_kv_cache_unified::ubatch_heads; - using defrag_info = llama_kv_cache_unified::defrag_info; + using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t; + using defrag_info = llama_kv_cache_unified::defrag_info; + using stream_copy_info = llama_kv_cache_unified::stream_copy_info; // used for errors - llama_kv_cache_unified_state(llama_memory_status status); + llama_kv_cache_unified_context(llama_memory_status status); - // used to create a full-cache state - llama_kv_cache_unified_state( + // used to create a full-cache context + llama_kv_cache_unified_context( llama_kv_cache_unified * kv); - // used to create an update state - llama_kv_cache_unified_state( + // used to create an update context + llama_kv_cache_unified_context( llama_kv_cache_unified * kv, llama_context * lctx, bool do_shift, - defrag_info dinfo); + defrag_info dinfo, + stream_copy_info sc_info); - // used to create a decode state from a batch - llama_kv_cache_unified_state( + // used to create a batch procesing context from a batch + llama_kv_cache_unified_context( llama_kv_cache_unified * kv, - llama_sbatch sbatch, - ubatch_heads heads, + slot_info_vec_t sinfos, std::vector ubatches); - virtual ~llama_kv_cache_unified_state(); + virtual ~llama_kv_cache_unified_context(); // - // llama_memory_state_i + // llama_memory_context_i // bool next() override; bool apply() override; - std::vector & out_ids() override; - llama_memory_status get_status() const override; const llama_ubatch & get_ubatch() const override; // - // llama_kv_cache_unified_state specific API + // llama_kv_cache_unified_context specific API // uint32_t get_n_kv() const; + // TODO: temporary + bool get_supports_set_rows() const; + // get views of the current state of the cache ggml_tensor * get_k(ggml_context * ctx, int32_t il) const; ggml_tensor * get_v(ggml_context * ctx, int32_t il) const; // store k_cur and v_cur in the cache based on the provided head location - ggml_tensor * cpy_k(ggml_context * ctx, ggml_tensor * k_cur, int32_t il) const; - ggml_tensor * cpy_v(ggml_context * ctx, ggml_tensor * v_cur, int32_t il) const; + ggml_tensor * cpy_k(ggml_context * ctx, ggml_tensor * k_cur, ggml_tensor * k_idxs, int32_t il) const; + ggml_tensor * cpy_v(ggml_context * ctx, ggml_tensor * v_cur, ggml_tensor * v_idxs, int32_t il) const; - void set_input_k_shift(ggml_tensor * dst) const; + ggml_tensor * build_input_k_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const; + ggml_tensor * build_input_v_idxs(ggml_context * ctx, const llama_ubatch & ubatch) const; + void set_input_k_idxs(ggml_tensor * dst, const llama_ubatch * ubatch) const; + void set_input_v_idxs(ggml_tensor * dst, const llama_ubatch * ubatch) const; + + void set_input_k_shift (ggml_tensor * dst) const; void set_input_kq_mask (ggml_tensor * dst, const llama_ubatch * ubatch, bool causal_attn) const; void set_input_pos_bucket(ggml_tensor * dst, const llama_ubatch * ubatch) const; @@ -275,23 +369,23 @@ class llama_kv_cache_unified_state : public llama_memory_state_i { llama_context * lctx; // - // update state + // update context // bool do_shift = false; defrag_info dinfo; + stream_copy_info sc_info; + // - // batch processing state + // batch processing context // - llama_sbatch sbatch; - - // the index of the next ubatch to process - size_t i_next = 0; + // the index of the cur ubatch to process + size_t i_cur = 0; - ubatch_heads heads; + slot_info_vec_t sinfos; std::vector ubatches; @@ -302,7 +396,4 @@ class llama_kv_cache_unified_state : public llama_memory_state_i { // a heuristic, to avoid attending the full cache if it is not yet utilized // as the cache gets filled, the benefit from this heuristic disappears int32_t n_kv; - - // the beginning of the current slot in which the ubatch will be inserted - int32_t head; }; diff --git a/src/llama-kv-cells.h b/src/llama-kv-cells.h index acf30aebec69b..0d0dd316fd041 100644 --- a/src/llama-kv-cells.h +++ b/src/llama-kv-cells.h @@ -7,6 +7,7 @@ #include #include #include +#include // meta information about KV cells that can be part of multiple sequences at the same time // TODO: add unit tests @@ -23,7 +24,7 @@ class llama_kv_cells_unified { used.clear(); - for (uint32_t s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (uint32_t s = 0; s < LLAMA_MAX_SEQ; ++s) { seq_pos[s].clear(); } } @@ -104,10 +105,30 @@ class llama_kv_cells_unified { res.resize(n); for (uint32_t j = 0; j < n; ++j) { - res.pos[j] = pos[i + j]; - res.seq[j] = seq[i + j]; + const auto idx = i + j; - assert(shift[i + j] == 0); + res.pos[j] = pos[idx]; + res.seq[j] = seq[idx]; + + assert(shift[idx] == 0); + } + + return res; + } + + // copy the state of cells [idxs[0], idxs[1], ..., idxs[idxs.size() - 1]) + llama_kv_cells_unified cp(const std::vector & idxs) const { + llama_kv_cells_unified res; + + res.resize(idxs.size()); + + for (uint32_t j = 0; j < idxs.size(); ++j) { + const auto idx = idxs[j]; + + res.pos[j] = pos[idx]; + res.seq[j] = seq[idx]; + + assert(shift[idx] == 0); } return res; @@ -118,26 +139,58 @@ class llama_kv_cells_unified { assert(i + other.pos.size() <= pos.size()); for (uint32_t j = 0; j < other.pos.size(); ++j) { - if (pos[i + j] == -1 && other.pos[j] != -1) { + const auto idx = i + j; + + if (pos[idx] == -1 && other.pos[j] != -1) { used.insert(i + j); } - if (pos[i + j] != -1 && other.pos[j] == -1) { + if (pos[idx] != -1 && other.pos[j] == -1) { used.erase(i + j); } - if (pos[i + j] != -1) { + if (pos[idx] != -1) { seq_pos_rm(i + j); } - pos[i + j] = other.pos[j]; - seq[i + j] = other.seq[j]; + pos[idx] = other.pos[j]; + seq[idx] = other.seq[j]; - if (pos[i + j] != -1) { + if (pos[idx] != -1) { seq_pos_add(i + j); } - assert(shift[i + j] == 0); + assert(shift[idx] == 0); + } + } + + // set the state of cells [idxs[0], idxs[1], ..., idxs[idxs.size() - 1]) + void set(const std::vector & idxs, const llama_kv_cells_unified & other) { + assert(idxs.size() == other.pos.size()); + + for (uint32_t j = 0; j < other.pos.size(); ++j) { + const auto idx = idxs[j]; + + if (pos[idx] == -1 && other.pos[j] != -1) { + used.insert(idx); + } + + if (pos[idx] != -1 && other.pos[j] == -1) { + used.erase(idx); + } + + if (pos[idx] != -1) { + seq_pos_rm(idx); + } + + pos[idx] = other.pos[j]; + seq[idx] = other.seq[j]; + + if (pos[idx] != -1) { + seq_pos_add(idx); + } + + assert(shift[idx] == 0); } } @@ -164,7 +217,7 @@ class llama_kv_cells_unified { assert(seq_id >= 0); seq[i].reset(seq_id); - seq_pos[seq_id].erase(pos[i]); + seq_pos_dec(seq_id, pos[i]); if (seq[i].none()) { pos[i] = -1; @@ -187,7 +240,7 @@ class llama_kv_cells_unified { seq[i].reset(); seq[i].set(seq_id); - seq_pos[seq_id].insert(pos[i]); + seq_pos_inc(seq_id, pos[i]); return false; } @@ -232,7 +285,7 @@ class llama_kv_cells_unified { assert(!seq[i].test(seq_id)); seq[i].set(seq_id); - seq_pos[seq_id].insert(pos[i]); + seq_pos_inc(seq_id, pos[i]); } // return the sequence id of this cell @@ -240,7 +293,7 @@ class llama_kv_cells_unified { llama_seq_id seq_get(uint32_t i) const { assert(seq[i].count() == 1); - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { if (seq[i].test(s)) { return s; } @@ -253,26 +306,30 @@ class llama_kv_cells_unified { // return -1 if the sequence is not present llama_pos seq_pos_min(llama_seq_id seq_id) const { assert(seq_id >= 0); - assert(seq_id < LLAMA_MAX_PARALLEL_SEQUENCES); + assert(seq_id < LLAMA_MAX_SEQ); if (seq_pos[seq_id].empty()) { return -1; } - return *seq_pos[seq_id].begin(); + assert(seq_pos[seq_id].begin()->second > 0); + + return seq_pos[seq_id].begin()->first; } // the maximum position of sequence seq_id currently present in any of the cells // return -1 if the sequence is not present llama_pos seq_pos_max(llama_seq_id seq_id) const { assert(seq_id >= 0); - assert(seq_id < LLAMA_MAX_PARALLEL_SEQUENCES); + assert(seq_id < LLAMA_MAX_SEQ); if (seq_pos[seq_id].empty()) { return -1; } - return *seq_pos[seq_id].rbegin(); + assert(seq_pos[seq_id].rbegin()->second > 0); + + return seq_pos[seq_id].rbegin()->first; } // note: call only if the cell is not empty @@ -384,31 +441,50 @@ class llama_kv_cells_unified { // std::vector shift; - using bits_t = std::bitset; + using seq_set_t = std::bitset; // the bitset seq[i] tells us which sequences are currently occupying the i-th cell - std::vector seq; + std::vector seq; - // the set seq_pos[s] tells us which positions are currently present for sequence s + // the set seq_pos[s][p] tells us how many times the position p is currently present for sequence s + // if the position p is not present, seq_pos[s][p] is not set // this way seq_pos[s].begin() and seq_pos[s].rbegin() give us the min/max positions currently in the cache - std::set seq_pos[LLAMA_MAX_PARALLEL_SEQUENCES]; + // + // note that we cannot a use an std::set because in some cases a position can occur more than once for the same seq: + // - during performing a cache reuse via (rm + add) + // - some vision models have input embeddings with repeating positions + // + std::map seq_pos[LLAMA_MAX_SEQ]; // helper functions for updating `seq_pos`, once cell at a time: + void seq_pos_dec(llama_seq_id s, llama_pos p) { + auto it = seq_pos[s].find(p); + assert(it != seq_pos[s].end()); + + if (--it->second == 0) { + seq_pos[s].erase(it); + } + } + + void seq_pos_inc(llama_seq_id s, llama_pos p) { + seq_pos[s][p]++; + } + // remove cell i void seq_pos_rm(uint32_t i) { - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { if (seq[i].test(s)) { - seq_pos[s].erase(pos[i]); + seq_pos_dec(s, pos[i]); } } } // add cell i void seq_pos_add(uint32_t i) { - for (int s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) { + for (int s = 0; s < LLAMA_MAX_SEQ; ++s) { if (seq[i].test(s)) { - seq_pos[s].insert(pos[i]); + seq_pos_inc(s, pos[i]); } } } diff --git a/src/llama-memory-hybrid.cpp b/src/llama-memory-hybrid.cpp new file mode 100644 index 0000000000000..d8e2086c87514 --- /dev/null +++ b/src/llama-memory-hybrid.cpp @@ -0,0 +1,252 @@ +#include "llama-memory-hybrid.h" + +#include "llama-impl.h" +#include "llama-model.h" +#include "llama-context.h" + +// +// llama_memory_hybrid +// + +llama_memory_hybrid::llama_memory_hybrid( + const llama_model & model, + /* attn */ + ggml_type type_k, + ggml_type type_v, + bool v_trans, + uint32_t kv_size, + uint32_t n_pad, + uint32_t n_swa, + llama_swa_type swa_type, + /* recurrent */ + ggml_type type_r, + ggml_type type_s, + uint32_t rs_size, + /* common */ + uint32_t n_seq_max, + bool offload, + /* layer filters */ + layer_filter_cb && filter_attn, + layer_filter_cb && filter_recr) : + hparams(model.hparams), + mem_attn(new llama_kv_cache_unified( + model, + filter_attn == nullptr ? + [&](int32_t il) { return !hparams.is_recurrent(il); } + : filter_attn, + type_k, + type_v, + v_trans, + offload, + 1, + kv_size, + n_seq_max, + n_pad, + n_swa, + swa_type + )), + mem_recr(new llama_memory_recurrent( + model, + filter_recr == nullptr ? + [&](int32_t il) { return hparams.is_recurrent(il); } + : filter_recr, + type_r, + type_s, + offload, + rs_size, + n_seq_max + )) {} + +llama_memory_context_ptr llama_memory_hybrid::init_batch(llama_batch_allocr & balloc, uint32_t n_ubatch, bool embd_all) { + do { + balloc.split_reset(); + + // follow the recurrent pattern for creating the ubatch splits + std::vector ubatches; + + while (true) { + llama_ubatch ubatch; + + if (embd_all) { + // if all tokens are output, split by sequence + ubatch = balloc.split_seq(n_ubatch); + } else { + ubatch = balloc.split_equal(n_ubatch, false); + } + + if (ubatch.n_tokens == 0) { + break; + } + + ubatches.push_back(std::move(ubatch)); // NOLINT + } + + if (balloc.get_n_used() < balloc.get_n_tokens()) { + // failed to find a suitable split + break; + } + + // prepare the recurrent batches first + if (!mem_recr->prepare(ubatches)) { + // TODO: will the recurrent cache be in an undefined context at this point? + LLAMA_LOG_ERROR("%s: failed to prepare recurrent ubatches\n", __func__); + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); + } + + // prepare the attention cache + auto heads_attn = mem_attn->prepare(ubatches); + if (heads_attn.empty()) { + LLAMA_LOG_ERROR("%s: failed to prepare attention ubatches\n", __func__); + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); + } + + return std::make_unique( + this, std::move(heads_attn), std::move(ubatches)); + } while(false); + + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); +} + +llama_memory_context_ptr llama_memory_hybrid::init_full() { + return std::make_unique(this); +} + +llama_memory_context_ptr llama_memory_hybrid::init_update(llama_context * lctx, bool optimize) { + return std::make_unique(this, lctx, optimize); +} + +bool llama_memory_hybrid::get_can_shift() const { + // Shifting is trivially supported for recurrent + return mem_attn->get_can_shift(); +} + +void llama_memory_hybrid::clear(bool data) { + mem_attn->clear(data); + mem_recr->clear(data); +} + +bool llama_memory_hybrid::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) { + // Try removing from the recurrent cache first since it may fail. If it does + // fail, the cache will not have been mutated. + if (!mem_recr->seq_rm(seq_id, p0, p1)) { + return false; + } + return mem_attn->seq_rm(seq_id, p0, p1); +} + +void llama_memory_hybrid::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) { + mem_attn->seq_cp(seq_id_src, seq_id_dst, p0, p1); + mem_recr->seq_cp(seq_id_src, seq_id_dst, p0, p1); +} + +void llama_memory_hybrid::seq_keep(llama_seq_id seq_id) { + mem_attn->seq_keep(seq_id); + mem_recr->seq_keep(seq_id); +} + +void llama_memory_hybrid::seq_add(llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) { + mem_attn->seq_add(seq_id, p0, p1, shift); + mem_recr->seq_add(seq_id, p0, p1, shift); +} + +void llama_memory_hybrid::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) { + mem_attn->seq_div(seq_id, p0, p1, d); + mem_recr->seq_div(seq_id, p0, p1, d); +} + +llama_pos llama_memory_hybrid::seq_pos_min(llama_seq_id seq_id) const { + // the min of the total cache is the max of the two caches' min values + return std::max(mem_attn->seq_pos_min(seq_id), mem_recr->seq_pos_min(seq_id)); +} + +llama_pos llama_memory_hybrid::seq_pos_max(llama_seq_id seq_id) const { + // the max of the total cache is the min of the two caches' max values + return std::min(mem_attn->seq_pos_max(seq_id), mem_recr->seq_pos_max(seq_id)); +} + +void llama_memory_hybrid::state_write(llama_io_write_i & io, llama_seq_id seq_id) const { + mem_attn->state_write(io, seq_id); + mem_recr->state_write(io, seq_id); +} + +void llama_memory_hybrid::state_read(llama_io_read_i & io, llama_seq_id seq_id) { + mem_attn->state_read(io, seq_id); + mem_recr->state_read(io, seq_id); +} + +llama_kv_cache_unified * llama_memory_hybrid::get_mem_attn() const { + return mem_attn.get(); +} + +llama_memory_recurrent * llama_memory_hybrid::get_mem_recr() const { + return mem_recr.get(); +} + +llama_memory_hybrid_context::llama_memory_hybrid_context(llama_memory_status status) : status(status) {} + +llama_memory_hybrid_context::llama_memory_hybrid_context(llama_memory_hybrid * mem) : + ctx_attn(mem->get_mem_attn()->init_full()), + ctx_recr(mem->get_mem_recr()->init_full()), + status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) { +} + +llama_memory_hybrid_context::llama_memory_hybrid_context( + llama_memory_hybrid * mem, + llama_context * lctx, + bool optimize) : + ctx_attn(mem->get_mem_attn()->init_update(lctx, optimize)), + ctx_recr(mem->get_mem_recr()->init_update(lctx, optimize)), + status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) { +} + +llama_memory_hybrid_context::llama_memory_hybrid_context( + llama_memory_hybrid * mem, + slot_info_vec_t sinfos_attn, + std::vector ubatches) : + ubatches(std::move(ubatches)), + // note: here we copy the ubatches. not sure if this is ideal + ctx_attn(new llama_kv_cache_unified_context(mem->get_mem_attn(), std::move(sinfos_attn), this->ubatches)), + ctx_recr(new llama_memory_recurrent_context(mem->get_mem_recr(), this->ubatches)), + status(llama_memory_status_combine(ctx_attn->get_status(), ctx_recr->get_status())) { +} + +bool llama_memory_hybrid_context::next() { + assert(status == LLAMA_MEMORY_STATUS_SUCCESS); + + ctx_attn->next(); + ctx_recr->next(); + + if (++i_next >= ubatches.size()) { + return false; + } + + return true; +} + +bool llama_memory_hybrid_context::apply() { + assert(!llama_memory_status_is_fail(status)); + + bool res = true; + + res = res & ctx_attn->apply(); + res = res & ctx_recr->apply(); + + return res; +} + +llama_memory_status llama_memory_hybrid_context::get_status() const { + return status; +} + +const llama_ubatch & llama_memory_hybrid_context::get_ubatch() const { + assert(status == LLAMA_MEMORY_STATUS_SUCCESS); + return ubatches[i_next]; +} + +const llama_kv_cache_unified_context * llama_memory_hybrid_context::get_attn() const { + return static_cast(ctx_attn.get()); +} + +const llama_memory_recurrent_context * llama_memory_hybrid_context::get_recr() const { + return static_cast(ctx_recr.get()); +} diff --git a/src/llama-memory-hybrid.h b/src/llama-memory-hybrid.h new file mode 100644 index 0000000000000..4ac318175785e --- /dev/null +++ b/src/llama-memory-hybrid.h @@ -0,0 +1,140 @@ +#pragma once + +#include "llama-batch.h" +#include "llama-graph.h" +#include "llama-kv-cache-unified.h" +#include "llama-memory.h" +#include "llama-memory-recurrent.h" + +#include +#include + +// +// llama_memory_hybrid +// + +// utilizes instances of llama_memory_recurrent and llama_kv_cache_unified to +// support models where each layer may be either attention-based or recurrent + +class llama_memory_hybrid : public llama_memory_i { +public: + + // this callback is used to filter out layers that should not be included in the cache + using layer_filter_cb = std::function; + + llama_memory_hybrid( + const llama_model & model, + /* attn */ + ggml_type type_k, + ggml_type type_v, + bool v_trans, + uint32_t kv_size, + uint32_t n_pad, + uint32_t n_swa, + llama_swa_type swa_type, + /* recurrent */ + ggml_type type_r, + ggml_type type_s, + uint32_t rs_size, + /* common */ + uint32_t n_seq_max, + bool offload, + /* layer filters */ + layer_filter_cb && filter_attn = nullptr, + layer_filter_cb && filter_recr = nullptr); + + ~llama_memory_hybrid() = default; + + // + // llama_memory_i + // + + llama_memory_context_ptr init_batch( + llama_batch_allocr & balloc, + uint32_t n_ubatch, + bool embd_all) override; + + llama_memory_context_ptr init_full() override; + + llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override; + + bool get_can_shift() const override; + + void clear(bool data) override; + + bool seq_rm (llama_seq_id seq_id, llama_pos p0, llama_pos p1) override; + void seq_cp (llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) override; + void seq_keep(llama_seq_id seq_id) override; + void seq_add (llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) override; + void seq_div (llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) override; + + llama_pos seq_pos_min(llama_seq_id seq_id) const override; + llama_pos seq_pos_max(llama_seq_id seq_id) const override; + + // state write/load + + void state_write(llama_io_write_i & io, llama_seq_id seq_id = -1) const override; + void state_read (llama_io_read_i & io, llama_seq_id seq_id = -1) override; + + // + // llama_memory_hybrid specific API + // + + llama_kv_cache_unified * get_mem_attn() const; + llama_memory_recurrent * get_mem_recr() const; + +private: + const llama_hparams & hparams; + + const std::unique_ptr mem_attn; + const std::unique_ptr mem_recr; +}; + +class llama_memory_hybrid_context : public llama_memory_context_i { +public: + using slot_info_vec_t = llama_kv_cache_unified::slot_info_vec_t; + + // init failure + explicit llama_memory_hybrid_context(llama_memory_status status); + + // init full + explicit llama_memory_hybrid_context(llama_memory_hybrid * mem); + + // init update + explicit llama_memory_hybrid_context( + llama_memory_hybrid * mem, + llama_context * lctx, + bool optimize); + + // init success + llama_memory_hybrid_context( + llama_memory_hybrid * mem, + slot_info_vec_t sinfos_attn, + std::vector ubatches); + + ~llama_memory_hybrid_context() = default; + + bool next() override; + bool apply() override; + + llama_memory_status get_status() const override; + const llama_ubatch & get_ubatch() const override; + + // + // llama_memory_hybrid_context + // + + const llama_kv_cache_unified_context * get_attn() const; + const llama_memory_recurrent_context * get_recr() const; + +private: + // the index of the next ubatch to process + size_t i_next = 0; + + std::vector ubatches; + + const llama_memory_context_ptr ctx_attn; + const llama_memory_context_ptr ctx_recr; + + const llama_memory_status status; +}; diff --git a/src/llama-kv-cache-recurrent.cpp b/src/llama-memory-recurrent.cpp similarity index 62% rename from src/llama-kv-cache-recurrent.cpp rename to src/llama-memory-recurrent.cpp index de23b4ad23bce..1e1a7a9b31e46 100644 --- a/src/llama-kv-cache-recurrent.cpp +++ b/src/llama-memory-recurrent.cpp @@ -1,4 +1,4 @@ -#include "llama-kv-cache-recurrent.h" +#include "llama-memory-recurrent.h" #include "llama-impl.h" #include "llama-io.h" @@ -12,27 +12,25 @@ #include // -// llama_kv_cache_recurrent +// llama_memory_recurrent // -llama_kv_cache_recurrent::llama_kv_cache_recurrent( - const llama_model & model, - ggml_type type_k, - ggml_type type_v, - bool offload, - uint32_t kv_size, - uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) { +llama_memory_recurrent::llama_memory_recurrent( + const llama_model & model, + layer_filter_cb && filter, + ggml_type type_r, + ggml_type type_s, + bool offload, + uint32_t mem_size, + uint32_t n_seq_max) : hparams(model.hparams), n_seq_max(n_seq_max) { const int32_t n_layer = hparams.n_layer; - LLAMA_LOG_INFO("%s: kv_size = %u, n_seq_max = %u, type_k = '%s', type_v = '%s', n_layer = %d\n", - __func__, kv_size, n_seq_max, ggml_type_name(type_k), ggml_type_name(type_v), n_layer); - head = 0; - size = kv_size; + size = mem_size; used = 0; cells.clear(); - cells.resize(kv_size); + cells.resize(mem_size); // create a context for each buffer type std::map ctx_map; @@ -59,12 +57,14 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent( return it->second; }; - k_l.reserve(n_layer); - v_l.reserve(n_layer); + r_l.resize(n_layer); + s_l.resize(n_layer); for (int i = 0; i < n_layer; i++) { - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(i) + hparams.n_embd_k_s(); - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(i) + hparams.n_embd_v_s(); + if (filter && !filter(i)) { + LLAMA_LOG_DEBUG("%s: layer %3d: skipped\n", __func__, i); + continue; + } const char * dev_name = "CPU"; @@ -81,15 +81,15 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent( ggml_context * ctx = ctx_for_buft(buft); if (!ctx) { - throw std::runtime_error("failed to create ggml context for kv cache"); + throw std::runtime_error("failed to create ggml context for rs cache"); } - ggml_tensor * k = ggml_new_tensor_1d(ctx, type_k, n_embd_k_gqa*kv_size); - ggml_tensor * v = ggml_new_tensor_1d(ctx, type_v, n_embd_v_gqa*kv_size); - ggml_format_name(k, "cache_k_l%d", i); - ggml_format_name(v, "cache_v_l%d", i); - k_l.push_back(k); - v_l.push_back(v); + ggml_tensor * r = ggml_new_tensor_1d(ctx, type_r, hparams.n_embd_r()*mem_size); + ggml_tensor * s = ggml_new_tensor_1d(ctx, type_s, hparams.n_embd_s()*mem_size); + ggml_format_name(r, "cache_r_l%d", i); + ggml_format_name(s, "cache_s_l%d", i); + r_l[i] = r; + s_l[i] = s; } // allocate tensors and initialize the buffers to avoid NaNs in the padding @@ -99,25 +99,25 @@ llama_kv_cache_recurrent::llama_kv_cache_recurrent( ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors_from_buft(ctx, buft); if (!buf) { - throw std::runtime_error("failed to allocate buffer for kv cache"); + throw std::runtime_error("failed to allocate buffer for rs cache"); } ggml_backend_buffer_clear(buf, 0); - LLAMA_LOG_INFO("%s: %10s KV buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf)/1024.0/1024.0); + LLAMA_LOG_INFO("%s: %10s RS buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf)/1024.0/1024.0); bufs.emplace_back(buf); } { - const size_t memory_size_k = size_k_bytes(); - const size_t memory_size_v = size_v_bytes(); + const size_t memory_size_r = size_r_bytes(); + const size_t memory_size_s = size_s_bytes(); - LLAMA_LOG_INFO("%s: KV self size = %7.2f MiB, K (%s): %7.2f MiB, V (%s): %7.2f MiB\n", __func__, - (float)(memory_size_k + memory_size_v) / (1024.0f * 1024.0f), - ggml_type_name(type_k), (float)memory_size_k / (1024.0f * 1024.0f), - ggml_type_name(type_v), (float)memory_size_v / (1024.0f * 1024.0f)); + LLAMA_LOG_INFO("%s: size = %7.2f MiB (%6u cells, %3d layers, %2u seqs), R (%s): %7.2f MiB, S (%s): %7.2f MiB\n", __func__, + (float)(memory_size_r + memory_size_s) / (1024.0f * 1024.0f), mem_size, n_layer, n_seq_max, + ggml_type_name(type_r), (float)memory_size_r / (1024.0f * 1024.0f), + ggml_type_name(type_s), (float)memory_size_s / (1024.0f * 1024.0f)); } } -void llama_kv_cache_recurrent::clear(bool data) { +void llama_memory_recurrent::clear(bool data) { for (int32_t i = 0; i < (int32_t) size; ++i) { cells[i].pos = -1; cells[i].seq_id.clear(); @@ -135,7 +135,7 @@ void llama_kv_cache_recurrent::clear(bool data) { } } -bool llama_kv_cache_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) { +bool llama_memory_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) { uint32_t new_head = size; if (p0 < 0) { @@ -154,7 +154,7 @@ bool llama_kv_cache_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_p if (0 <= seq_id) { int32_t & tail_id = cells[seq_id].tail; if (tail_id >= 0) { - const kv_cell & cell = cells[tail_id]; + const auto & cell = cells[tail_id]; // partial intersection is invalid if ((0 < p0 && p0 <= cell.pos) || (0 < p1 && p1 <= cell.pos)) { return false; @@ -202,7 +202,7 @@ bool llama_kv_cache_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_p return true; } -void llama_kv_cache_recurrent::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) { +void llama_memory_recurrent::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_id_dst, llama_pos p0, llama_pos p1) { if (seq_id_src == seq_id_dst) { return; } @@ -216,11 +216,11 @@ void llama_kv_cache_recurrent::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_ } if ((uint32_t) seq_id_dst < size && (uint32_t) seq_id_src < size) { - kv_cell & tail_src = cells[seq_id_src]; - kv_cell & tail_dst = cells[seq_id_dst]; + auto & tail_src = cells[seq_id_src]; + auto & tail_dst = cells[seq_id_dst]; if (tail_dst.tail >= 0) { // clear destination seq_id if it wasn't empty - kv_cell & cell_dst = cells[tail_dst.tail]; + auto & cell_dst = cells[tail_dst.tail]; cell_dst.seq_id.erase(seq_id_dst); tail_dst.tail = -1; @@ -231,7 +231,7 @@ void llama_kv_cache_recurrent::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_ } } if (tail_src.tail >= 0) { - kv_cell & cell_src = cells[tail_src.tail]; + auto & cell_src = cells[tail_src.tail]; cell_src.seq_id.insert(seq_id_dst); tail_dst.tail = tail_src.tail; @@ -239,7 +239,7 @@ void llama_kv_cache_recurrent::seq_cp(llama_seq_id seq_id_src, llama_seq_id seq_ } } -void llama_kv_cache_recurrent::seq_keep(llama_seq_id seq_id) { +void llama_memory_recurrent::seq_keep(llama_seq_id seq_id) { uint32_t new_head = size; for (uint32_t i = 0; i < size; ++i) { @@ -271,7 +271,7 @@ void llama_kv_cache_recurrent::seq_keep(llama_seq_id seq_id) { } } -void llama_kv_cache_recurrent::seq_add(llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) { +void llama_memory_recurrent::seq_add(llama_seq_id seq_id, llama_pos p0, llama_pos p1, llama_pos shift) { if (shift == 0) { return; } @@ -293,7 +293,7 @@ void llama_kv_cache_recurrent::seq_add(llama_seq_id seq_id, llama_pos p0, llama_ if (0 <= seq_id && seq_id < (int64_t) size) { const int32_t tail_id = cells[seq_id].tail; if (tail_id >= 0) { - kv_cell & cell = cells[tail_id]; + auto & cell = cells[tail_id]; if (cell.has_seq_id(seq_id) && p0 <= cell.pos && cell.pos < p1) { cell.pos += shift; } @@ -301,7 +301,7 @@ void llama_kv_cache_recurrent::seq_add(llama_seq_id seq_id, llama_pos p0, llama_ } } -void llama_kv_cache_recurrent::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) { +void llama_memory_recurrent::seq_div(llama_seq_id seq_id, llama_pos p0, llama_pos p1, int d) { if (d == 1) { return; } @@ -323,7 +323,7 @@ void llama_kv_cache_recurrent::seq_div(llama_seq_id seq_id, llama_pos p0, llama_ if (0 <= seq_id && seq_id < (int64_t) size) { const int32_t tail_id = cells[seq_id].tail; if (tail_id >= 0) { - kv_cell & cell = cells[tail_id]; + auto & cell = cells[tail_id]; if (cell.has_seq_id(seq_id) && p0 <= cell.pos && cell.pos < p1) { cell.pos /= d; } @@ -331,7 +331,7 @@ void llama_kv_cache_recurrent::seq_div(llama_seq_id seq_id, llama_pos p0, llama_ } } -llama_pos llama_kv_cache_recurrent::seq_pos_min(llama_seq_id seq_id) const { +llama_pos llama_memory_recurrent::seq_pos_min(llama_seq_id seq_id) const { llama_pos result = std::numeric_limits::max(); for (uint32_t i = 0; i < size; ++i) { @@ -347,7 +347,7 @@ llama_pos llama_kv_cache_recurrent::seq_pos_min(llama_seq_id seq_id) const { return result; } -llama_pos llama_kv_cache_recurrent::seq_pos_max(llama_seq_id seq_id) const { +llama_pos llama_memory_recurrent::seq_pos_max(llama_seq_id seq_id) const { llama_pos result = -1; for (uint32_t i = 0; i < size; ++i) { @@ -359,45 +359,55 @@ llama_pos llama_kv_cache_recurrent::seq_pos_max(llama_seq_id seq_id) const { return result; } -llama_memory_state_ptr llama_kv_cache_recurrent::init_batch(const llama_batch & batch, uint32_t n_ubatch, bool embd_pooled) { - GGML_UNUSED(embd_pooled); +llama_memory_context_ptr llama_memory_recurrent::init_batch(llama_batch_allocr & balloc, uint32_t n_ubatch, bool embd_all) { + do { + balloc.split_reset(); - auto sbatch = llama_sbatch(batch, hparams.n_embd, false); + std::vector ubatches; + while (true) { + llama_ubatch ubatch; - std::vector ubatches; + if (embd_all) { + // if all tokens are output, split by sequence + ubatch = balloc.split_seq(n_ubatch); + } else { + ubatch = balloc.split_equal(n_ubatch, false); + } - while (sbatch.n_tokens > 0) { - llama_ubatch ubatch; + if (ubatch.n_tokens == 0) { + break; + } - if (embd_pooled) { - // Pooled embeddings cannot be split across ubatches (yet) - ubatch = sbatch.split_seq(n_ubatch); - } else { - ubatch = sbatch.split_equal(n_ubatch); + ubatches.push_back(std::move(ubatch)); // NOLINT } - ubatches.push_back(ubatch); - } + if (balloc.get_n_used() < balloc.get_n_tokens()) { + // failed to find a suitable split + break; + } - if (!prepare(ubatches)) { - return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); - } + if (!prepare(ubatches)) { + break; + } - return std::make_unique(LLAMA_MEMORY_STATUS_SUCCESS, this, std::move(sbatch), std::move(ubatches)); + return std::make_unique(this, std::move(ubatches)); + } while (false); + + return std::make_unique(LLAMA_MEMORY_STATUS_FAILED_PREPARE); } -llama_memory_state_ptr llama_kv_cache_recurrent::init_full() { - return std::make_unique(LLAMA_MEMORY_STATUS_SUCCESS, this); +llama_memory_context_ptr llama_memory_recurrent::init_full() { + return std::make_unique(this); } -llama_memory_state_ptr llama_kv_cache_recurrent::init_update(llama_context * lctx, bool optimize) { +llama_memory_context_ptr llama_memory_recurrent::init_update(llama_context * lctx, bool optimize) { GGML_UNUSED(lctx); GGML_UNUSED(optimize); - return std::make_unique(LLAMA_MEMORY_STATUS_NO_UPDATE); + return std::make_unique(LLAMA_MEMORY_STATUS_NO_UPDATE); } -bool llama_kv_cache_recurrent::prepare(const std::vector & ubatches) { +bool llama_memory_recurrent::prepare(const std::vector & ubatches) { // simply remember the full state because it is very small for this type of cache // TODO: optimize auto org_cells = cells; @@ -421,10 +431,9 @@ bool llama_kv_cache_recurrent::prepare(const std::vector & ubatche return success; } -bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { - const uint32_t n_seqs = ubatch.n_seqs; - +bool llama_memory_recurrent::find_slot(const llama_ubatch & ubatch) { const uint32_t n_seq_tokens = ubatch.n_seq_tokens; + const uint32_t n_seqs = ubatch.n_seqs; // if we have enough unused cells before the current head -> // better to start searching from the beginning of the cache, hoping to fill it @@ -437,16 +446,18 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { // A slot should be always be contiguous. // can only process batches with an equal number of new tokens in each sequence - GGML_ASSERT(ubatch.equal_seqs); + GGML_ASSERT(ubatch.equal_seqs()); int32_t min = size - 1; int32_t max = 0; // everything should fit if all seq_ids are smaller than the max for (uint32_t s = 0; s < n_seqs; ++s) { - const uint32_t n_seq_id = ubatch.n_seq_id[s]; + const uint32_t i = s*n_seq_tokens; // first token of sequence set s + const uint32_t n_seq_id = ubatch.n_seq_id[i]; + for (uint32_t j = 0; j < n_seq_id; ++j) { - const llama_seq_id seq_id = ubatch.seq_id[s][j]; + const llama_seq_id seq_id = ubatch.seq_id[i][j]; if (seq_id < 0 || (uint32_t) seq_id >= size) { // too big seq_id @@ -455,9 +466,9 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { return false; } if (j > 0) { - kv_cell & seq = cells[seq_id]; + auto & seq = cells[seq_id]; if (seq.tail >= 0) { - kv_cell & cell = cells[seq.tail]; + auto & cell = cells[seq.tail]; // clear cells from seq_ids that become shared // (should not normally happen, but let's handle it anyway) cell.seq_id.erase(seq_id); @@ -477,7 +488,7 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { std::vector tails_verif; tails_verif.assign(size, -1); for (uint32_t i = 0; i < size; ++i) { - kv_cell & cell = cells[i]; + auto & cell = cells[i]; for (llama_seq_id seq_id : cell.seq_id) { if (tails_verif[seq_id] != -1) { LLAMA_LOG_ERROR("%s: duplicate tail for seq_id %d in cell %d and %d\n", __func__, seq_id, i, tails_verif[seq_id]); @@ -498,28 +509,29 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { for (uint32_t i = 0; i < size; ++i) { if (next_empty_cell >= size) { next_empty_cell -= size; } - kv_cell & cell = cells[next_empty_cell]; + auto & cell = cells[next_empty_cell]; if (cell.is_empty()) { break; } next_empty_cell += 1; } // find usable cell range for (uint32_t s = 0; s < n_seqs; ++s) { - const llama_seq_id seq_id = ubatch.seq_id[s][0]; - kv_cell & seq_meta = cells[seq_id]; + const uint32_t i = s*n_seq_tokens; + const llama_seq_id seq_id = ubatch.seq_id[i][0]; + auto & seq_meta = cells[seq_id]; bool has_cell = false; if (seq_meta.tail >= 0) { - kv_cell & cell = cells[seq_meta.tail]; + auto & cell = cells[seq_meta.tail]; GGML_ASSERT(cell.has_seq_id(seq_id)); // does this seq_id "own" the cell? if (cell.seq_id.size() == 1) { has_cell = true; } } if (!has_cell) { - kv_cell & empty_cell = cells[next_empty_cell]; + auto & empty_cell = cells[next_empty_cell]; GGML_ASSERT(empty_cell.is_empty()); // copy old tail into the empty cell if (seq_meta.tail >= 0) { - kv_cell & orig_cell = cells[seq_meta.tail]; + auto & orig_cell = cells[seq_meta.tail]; empty_cell.pos = orig_cell.pos; empty_cell.src = orig_cell.src; orig_cell.seq_id.erase(seq_id); @@ -529,10 +541,10 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { seq_meta.tail = next_empty_cell; // find next empty cell if (s + 1 < n_seqs) { - for (uint32_t i = 0; i < size; ++i) { + for (uint32_t j = 0; j < size; ++j) { next_empty_cell += 1; if (next_empty_cell >= size) { next_empty_cell -= size; } - kv_cell & cell = cells[next_empty_cell]; + auto & cell = cells[next_empty_cell]; if (cell.is_empty()) { break; } } } @@ -543,19 +555,20 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { // gather and re-order for (uint32_t s = 0; s < n_seqs; ++s) { + const uint32_t i = s*n_seq_tokens; const int32_t dst_id = s + min; - const int32_t src_id = cells[ubatch.seq_id[s][0]].tail; + const int32_t src_id = cells[ubatch.seq_id[i][0]].tail; if (dst_id != src_id) { - kv_cell & dst_cell = cells[dst_id]; - kv_cell & src_cell = cells[src_id]; + auto & dst_cell = cells[dst_id]; + auto & src_cell = cells[src_id]; std::swap(dst_cell.pos, src_cell.pos); std::swap(dst_cell.src, src_cell.src); std::swap(dst_cell.seq_id, src_cell.seq_id); // swap tails - for (uint32_t i = 0; i < size; ++i) { - int32_t & tail = cells[i].tail; + for (uint32_t j = 0; j < size; ++j) { + int32_t & tail = cells[j].tail; if (tail == src_id) { tail = dst_id; } else if (tail == dst_id) { @@ -567,20 +580,21 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { // update the pos of the used seqs for (uint32_t s = 0; s < n_seqs; ++s) { - const llama_pos last_pos = ubatch.pos[n_seq_tokens * s + n_seq_tokens - 1]; + const uint32_t i = s*n_seq_tokens; + const llama_pos last_pos = ubatch.pos[i + n_seq_tokens - 1]; const int32_t cell_id = s + min; - kv_cell & cell = cells[cell_id]; + auto & cell = cells[cell_id]; if (cell.pos >= 0 && last_pos != cell.pos + (llama_pos) n_seq_tokens) { // What should happen when the pos backtracks or skips a value? // Clearing the state mid-batch would require special-casing which isn't done. LLAMA_LOG_WARN("%s: non-consecutive token position %d after %d for sequence %d with %u new tokens\n", - __func__, last_pos, cell.pos, ubatch.seq_id[s][0], n_seq_tokens); + __func__, last_pos, cell.pos, ubatch.seq_id[i][0], n_seq_tokens); } cell.pos = last_pos; cell.seq_id.clear(); - for (int32_t j = 0; j < ubatch.n_seq_id[s]; ++j) { - const llama_seq_id seq_id = ubatch.seq_id[s][j]; + for (int32_t j = 0; j < ubatch.n_seq_id[i]; ++j) { + const llama_seq_id seq_id = ubatch.seq_id[i][j]; cell.seq_id.insert(seq_id); cells[seq_id].tail = cell_id; } @@ -622,18 +636,18 @@ bool llama_kv_cache_recurrent::find_slot(const llama_ubatch & ubatch) { head = min; n = max - min + 1; used = std::count_if(cells.begin(), cells.end(), - [](const kv_cell & cell){ return !cell.is_empty(); }); + [](const mem_cell & cell){ return !cell.is_empty(); }); // sanity check return n >= n_seqs; } -bool llama_kv_cache_recurrent::get_can_shift() const { +bool llama_memory_recurrent::get_can_shift() const { // shifting the pos is trivial for recurrent models return true; } -size_t llama_kv_cache_recurrent::total_size() const { +size_t llama_memory_recurrent::total_size() const { size_t size = 0; for (const auto & buf : bufs) { size += ggml_backend_buffer_get_size(buf.get()); @@ -642,27 +656,31 @@ size_t llama_kv_cache_recurrent::total_size() const { return size; } -size_t llama_kv_cache_recurrent::size_k_bytes() const { - size_t size_k_bytes = 0; +size_t llama_memory_recurrent::size_r_bytes() const { + size_t size_r_bytes = 0; - for (const auto & k : k_l) { - size_k_bytes += ggml_nbytes(k); + for (const auto & r : r_l) { + if (r != nullptr) { + size_r_bytes += ggml_nbytes(r); + } } - return size_k_bytes; + return size_r_bytes; } -size_t llama_kv_cache_recurrent::size_v_bytes() const { - size_t size_v_bytes = 0; +size_t llama_memory_recurrent::size_s_bytes() const { + size_t size_s_bytes = 0; - for (const auto & v : v_l) { - size_v_bytes += ggml_nbytes(v); + for (const auto & s : s_l) { + if (s != nullptr) { + size_s_bytes += ggml_nbytes(s); + } } - return size_v_bytes; + return size_s_bytes; } -void llama_kv_cache_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const { +void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const { std::vector> cell_ranges; // ranges, from inclusive, to exclusive uint32_t cell_count = 0; @@ -700,7 +718,7 @@ void llama_kv_cache_recurrent::state_write(llama_io_write_i & io, llama_seq_id s state_write_data(io, cell_ranges); } -void llama_kv_cache_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id) { +void llama_memory_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id) { uint32_t cell_count; io.read_to(&cell_count, sizeof(cell_count)); @@ -719,7 +737,7 @@ void llama_kv_cache_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq } } -void llama_kv_cache_recurrent::state_write_meta(llama_io_write_i & io, const std::vector> & cell_ranges, llama_seq_id seq_id) const { +void llama_memory_recurrent::state_write_meta(llama_io_write_i & io, const std::vector> & cell_ranges, llama_seq_id seq_id) const { for (const auto & range : cell_ranges) { for (uint32_t i = range.first; i < range.second; ++i) { const auto & cell = cells[i]; @@ -738,98 +756,93 @@ void llama_kv_cache_recurrent::state_write_meta(llama_io_write_i & io, const std } } -void llama_kv_cache_recurrent::state_write_data(llama_io_write_i & io, const std::vector> & cell_ranges) const { - const uint32_t v_trans = 0; +void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::vector> & cell_ranges) const { + const uint32_t s_trans = 0; const uint32_t n_layer = hparams.n_layer; - io.write(&v_trans, sizeof(v_trans)); - io.write(&n_layer, sizeof(n_layer)); + io.write(&s_trans, sizeof(s_trans)); + io.write(&n_layer, sizeof(n_layer)); std::vector tmp_buf; // Iterate and write all the keys first, each row is a cell // Get whole range at a time for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il) + hparams.n_embd_k_s(); // Write key type - const int32_t k_type_i = (int32_t)k_l[il]->type; - io.write(&k_type_i, sizeof(k_type_i)); + const int32_t r_type_i = (int32_t)r_l[il]->type; + io.write(&r_type_i, sizeof(r_type_i)); // Write row size of key - const uint64_t k_size_row = ggml_row_size(k_l[il]->type, n_embd_k_gqa); - io.write(&k_size_row, sizeof(k_size_row)); + const uint64_t r_size_row = ggml_row_size(r_l[il]->type, hparams.n_embd_r()); + io.write(&r_size_row, sizeof(r_size_row)); // Read each range of cells of k_size length each into tmp_buf and write out for (const auto & range : cell_ranges) { const size_t range_size = range.second - range.first; - const size_t buf_size = range_size * k_size_row; - io.write_tensor(k_l[il], range.first * k_size_row, buf_size); + const size_t buf_size = range_size * r_size_row; + io.write_tensor(r_l[il], range.first * r_size_row, buf_size); } } - if (!v_trans) { + if (!s_trans) { for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); // Write value type - const int32_t v_type_i = (int32_t)v_l[il]->type; - io.write(&v_type_i, sizeof(v_type_i)); + const int32_t s_type_i = (int32_t)s_l[il]->type; + io.write(&s_type_i, sizeof(s_type_i)); // Write row size of value - const uint64_t v_size_row = ggml_row_size(v_l[il]->type, n_embd_v_gqa); - io.write(&v_size_row, sizeof(v_size_row)); + const uint64_t s_size_row = ggml_row_size(s_l[il]->type, hparams.n_embd_s()); + io.write(&s_size_row, sizeof(s_size_row)); - // Read each range of cells of v_size length each into tmp_buf and write out + // Read each range of cells of s_size length each into tmp_buf and write out for (const auto & range : cell_ranges) { const size_t range_size = range.second - range.first; - const size_t buf_size = range_size * v_size_row; - io.write_tensor(v_l[il], range.first * v_size_row, buf_size); + const size_t buf_size = range_size * s_size_row; + io.write_tensor(s_l[il], range.first * s_size_row, buf_size); } } } else { // When v is transposed, we also need the element size and get the element ranges from each row - const uint32_t kv_size = size; + const uint32_t mem_size = size; for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_s = hparams.n_embd_s(); // Write value type - const int32_t v_type_i = (int32_t)v_l[il]->type; - io.write(&v_type_i, sizeof(v_type_i)); + const int32_t s_type_i = (int32_t)s_l[il]->type; + io.write(&s_type_i, sizeof(s_type_i)); // Write element size - const uint32_t v_size_el = ggml_type_size(v_l[il]->type); - io.write(&v_size_el, sizeof(v_size_el)); + const uint32_t s_size_el = ggml_type_size(s_l[il]->type); + io.write(&s_size_el, sizeof(s_size_el)); // Write GQA embedding size - io.write(&n_embd_v_gqa, sizeof(n_embd_v_gqa)); + io.write(&n_embd_s, sizeof(n_embd_s)); // For each row, we get the element values of each cell - for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { + for (uint32_t j = 0; j < n_embd_s; ++j) { // Read each range of cells of v_size_el length each into tmp_buf and write out for (const auto & range : cell_ranges) { const size_t range_size = range.second - range.first; - const size_t src_offset = (range.first + j * kv_size) * v_size_el; - const size_t buf_size = range_size * v_size_el; - io.write_tensor(v_l[il], src_offset, buf_size); + const size_t src_offset = (range.first + j * mem_size) * s_size_el; + const size_t buf_size = range_size * s_size_el; + io.write_tensor(s_l[il], src_offset, buf_size); } } } } } -bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id) { +bool llama_memory_recurrent::state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id) { if (dest_seq_id != -1) { // single sequence seq_rm(dest_seq_id, -1, -1); - llama_sbatch sbatch; - llama_ubatch batch = sbatch.reserve_ubatch(cell_count, /* has_embd */ false); + llama_batch_allocr balloc(hparams.n_pos_per_embd()); - batch.n_tokens = cell_count; - batch.n_seq_tokens = cell_count; - batch.n_seqs = 1; + llama_ubatch ubatch = balloc.ubatch_reserve(cell_count, 1); for (uint32_t i = 0; i < cell_count; ++i) { llama_pos pos; @@ -843,12 +856,12 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce return false; } - batch.pos[i] = pos; + ubatch.pos[i] = pos; } - batch.n_seq_id[0] = 1; - batch.seq_id[0] = &dest_seq_id; + ubatch.n_seq_id[0] = 1; + ubatch.seq_id[0] = &dest_seq_id; - if (!find_slot(batch)) { + if (!find_slot(ubatch)) { LLAMA_LOG_ERROR("%s: failed to find available cells in kv cache\n", __func__); return false; } @@ -856,8 +869,8 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce // DEBUG CHECK: kv.head should be our first cell, kv.head + cell_count - 1 should be our last cell (verify seq_id and pos values) // Assume that this is one contiguous block of cells GGML_ASSERT(head + cell_count <= size); - GGML_ASSERT(cells[head].pos == batch.pos[0]); - GGML_ASSERT(cells[head + cell_count - 1].pos == batch.pos[cell_count - 1]); + GGML_ASSERT(cells[head].pos == ubatch.pos[0]); + GGML_ASSERT(cells[head + cell_count - 1].pos == ubatch.pos[cell_count - 1]); GGML_ASSERT(cells[head].has_seq_id(dest_seq_id)); GGML_ASSERT(cells[head + cell_count - 1].has_seq_id(dest_seq_id)); } else { @@ -871,7 +884,7 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce clear(true); for (uint32_t i = 0; i < cell_count; ++i) { - kv_cell & cell = cells[i]; + auto & cell = cells[i]; llama_pos pos; uint32_t n_seq_id; @@ -885,7 +898,7 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce llama_seq_id seq_id; io.read_to(&seq_id, sizeof(seq_id)); - // TODO: llama_kv_cache_recurrent should have a notion of max sequences + // TODO: llama_memory_recurrent should have a notion of max sequences //if (seq_id < 0 || (uint32_t) seq_id >= llama_n_seq_max(ctx)) { if (seq_id < 0) { //LLAMA_LOG_ERROR("%s: invalid seq_id, %d is out of range [0, %u)\n", __func__, seq_id, llama_n_seq_max(ctx)); @@ -917,10 +930,10 @@ bool llama_kv_cache_recurrent::state_read_meta(llama_io_read_i & io, uint32_t ce return true; } -bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t cell_count) { - uint32_t v_trans; +bool llama_memory_recurrent::state_read_data(llama_io_read_i & io, uint32_t cell_count) { + uint32_t s_trans; uint32_t n_layer; - io.read_to(&v_trans, sizeof(v_trans)); + io.read_to(&s_trans, sizeof(s_trans)); io.read_to(&n_layer, sizeof(n_layer)); if (n_layer != hparams.n_layer) { @@ -931,102 +944,100 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce LLAMA_LOG_ERROR("%s: not enough cells in kv cache to restore state (%u > %u)\n", __func__, cell_count, size); return false; } - if (false != (bool) v_trans) { - LLAMA_LOG_ERROR("%s: incompatible V transposition\n", __func__); + if (false != (bool) s_trans) { + LLAMA_LOG_ERROR("%s: incompatible s transposition\n", __func__); return false; } // For each layer, read the keys for each cell, one row is one cell, read as one contiguous block for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_k_gqa = hparams.n_embd_k_gqa(il) + hparams.n_embd_k_s(); // Read type of key - int32_t k_type_i_ref; - io.read_to(&k_type_i_ref, sizeof(k_type_i_ref)); - const int32_t k_type_i = (int32_t) k_l[il]->type; - if (k_type_i != k_type_i_ref) { - LLAMA_LOG_ERROR("%s: mismatched key type (%d != %d, layer %d)\n", __func__, k_type_i, k_type_i_ref, il); + int32_t r_type_i_ref; + io.read_to(&r_type_i_ref, sizeof(r_type_i_ref)); + const int32_t r_type_i = (int32_t) r_l[il]->type; + if (r_type_i != r_type_i_ref) { + LLAMA_LOG_ERROR("%s: mismatched r type (%d != %d, layer %d)\n", __func__, r_type_i, r_type_i_ref, il); return false; } // Read row size of key - uint64_t k_size_row_ref; - io.read_to(&k_size_row_ref, sizeof(k_size_row_ref)); - const size_t k_size_row = ggml_row_size(k_l[il]->type, n_embd_k_gqa); - if (k_size_row != k_size_row_ref) { - LLAMA_LOG_ERROR("%s: mismatched key row size (%zu != %zu, layer %d)\n", __func__, k_size_row, (size_t) k_size_row_ref, il); + uint64_t r_size_row_ref; + io.read_to(&r_size_row_ref, sizeof(r_size_row_ref)); + const size_t r_size_row = ggml_row_size(r_l[il]->type, hparams.n_embd_r()); + if (r_size_row != r_size_row_ref) { + LLAMA_LOG_ERROR("%s: mismatched r row size (%zu != %zu, layer %d)\n", __func__, r_size_row, (size_t) r_size_row_ref, il); return false; } if (cell_count) { // Read and set the keys for the whole cell range - ggml_backend_tensor_set(k_l[il], io.read(cell_count * k_size_row), head * k_size_row, cell_count * k_size_row); + ggml_backend_tensor_set(r_l[il], io.read(cell_count * r_size_row), head * r_size_row, cell_count * r_size_row); } } - if (!v_trans) { + if (!s_trans) { for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); // Read type of value - int32_t v_type_i_ref; - io.read_to(&v_type_i_ref, sizeof(v_type_i_ref)); - const int32_t v_type_i = (int32_t)v_l[il]->type; - if (v_type_i != v_type_i_ref) { - LLAMA_LOG_ERROR("%s: mismatched value type (%d != %d, layer %d)\n", __func__, v_type_i, v_type_i_ref, il); + int32_t s_type_i_ref; + io.read_to(&s_type_i_ref, sizeof(s_type_i_ref)); + const int32_t s_type_i = (int32_t)s_l[il]->type; + if (s_type_i != s_type_i_ref) { + LLAMA_LOG_ERROR("%s: mismatched s type (%d != %d, layer %d)\n", __func__, s_type_i, s_type_i_ref, il); return false; } // Read row size of value - uint64_t v_size_row_ref; - io.read_to(&v_size_row_ref, sizeof(v_size_row_ref)); - const size_t v_size_row = ggml_row_size(v_l[il]->type, n_embd_v_gqa); - if (v_size_row != v_size_row_ref) { - LLAMA_LOG_ERROR("%s: mismatched value row size (%zu != %zu, layer %d)\n", __func__, v_size_row, (size_t) v_size_row_ref, il); + uint64_t s_size_row_ref; + io.read_to(&s_size_row_ref, sizeof(s_size_row_ref)); + const size_t s_size_row = ggml_row_size(s_l[il]->type, hparams.n_embd_s()); + if (s_size_row != s_size_row_ref) { + LLAMA_LOG_ERROR("%s: mismatched s row size (%zu != %zu, layer %d)\n", __func__, s_size_row, (size_t) s_size_row_ref, il); return false; } if (cell_count) { // Read and set the values for the whole cell range - ggml_backend_tensor_set(v_l[il], io.read(cell_count * v_size_row), head * v_size_row, cell_count * v_size_row); + ggml_backend_tensor_set(s_l[il], io.read(cell_count * s_size_row), head * s_size_row, cell_count * s_size_row); } } } else { // For each layer, read the values for each cell (transposed) for (uint32_t il = 0; il < n_layer; ++il) { - const uint32_t n_embd_v_gqa = hparams.n_embd_v_gqa(il) + hparams.n_embd_v_s(); + const uint32_t n_embd_s = hparams.n_embd_s(); // Read type of value - int32_t v_type_i_ref; - io.read_to(&v_type_i_ref, sizeof(v_type_i_ref)); - const int32_t v_type_i = (int32_t)v_l[il]->type; - if (v_type_i != v_type_i_ref) { - LLAMA_LOG_ERROR("%s: mismatched value type (%d != %d, layer %d)\n", __func__, v_type_i, v_type_i_ref, il); + int32_t s_type_i_ref; + io.read_to(&s_type_i_ref, sizeof(s_type_i_ref)); + const int32_t s_type_i = (int32_t)s_l[il]->type; + if (s_type_i != s_type_i_ref) { + LLAMA_LOG_ERROR("%s: mismatched s type (%d != %d, layer %d)\n", __func__, s_type_i, s_type_i_ref, il); return false; } // Read element size of value - uint32_t v_size_el_ref; - io.read_to(&v_size_el_ref, sizeof(v_size_el_ref)); - const size_t v_size_el = ggml_type_size(v_l[il]->type); - if (v_size_el != v_size_el_ref) { - LLAMA_LOG_ERROR("%s: mismatched value element size (%zu != %zu, layer %d)\n", __func__, v_size_el, (size_t) v_size_el_ref, il); + uint32_t s_size_el_ref; + io.read_to(&s_size_el_ref, sizeof(s_size_el_ref)); + const size_t s_size_el = ggml_type_size(s_l[il]->type); + if (s_size_el != s_size_el_ref) { + LLAMA_LOG_ERROR("%s: mismatched s element size (%zu != %zu, layer %d)\n", __func__, s_size_el, (size_t) s_size_el_ref, il); return false; } - // Read GQA embedding size - uint32_t n_embd_v_gqa_ref; - io.read_to(&n_embd_v_gqa_ref, sizeof(n_embd_v_gqa_ref)); - if (n_embd_v_gqa != n_embd_v_gqa_ref) { - LLAMA_LOG_ERROR("%s: mismatched GQA embedding size (%u != %u, layer %d)\n", __func__, n_embd_v_gqa, n_embd_v_gqa_ref, il); + // Read state embedding size + uint32_t n_embd_s_ref; + io.read_to(&n_embd_s_ref, sizeof(n_embd_s_ref)); + if (n_embd_s != n_embd_s_ref) { + LLAMA_LOG_ERROR("%s: mismatched s embedding size (%u != %u, layer %d)\n", __func__, n_embd_s, n_embd_s_ref, il); return false; } if (cell_count) { // For each row in the transposed matrix, read the values for the whole cell range - for (uint32_t j = 0; j < n_embd_v_gqa; ++j) { - const size_t dst_offset = (head + j * size) * v_size_el; - ggml_backend_tensor_set(v_l[il], io.read(cell_count * v_size_el), dst_offset, cell_count * v_size_el); + for (uint32_t j = 0; j < n_embd_s; ++j) { + const size_t dst_offset = (head + j * size) * s_size_el; + ggml_backend_tensor_set(s_l[il], io.read(cell_count * s_size_el), dst_offset, cell_count * s_size_el); } } } @@ -1036,25 +1047,22 @@ bool llama_kv_cache_recurrent::state_read_data(llama_io_read_i & io, uint32_t ce } // -// llama_kv_cache_recurrent_state +// llama_memory_recurrent_context // -llama_kv_cache_recurrent_state::llama_kv_cache_recurrent_state(llama_memory_status status) : status(status) {} +llama_memory_recurrent_context::llama_memory_recurrent_context(llama_memory_status status) : status(status) {} -llama_kv_cache_recurrent_state::llama_kv_cache_recurrent_state( - llama_memory_status status, - llama_kv_cache_recurrent * kv) : status(status), kv(kv), is_full(true) { +llama_memory_recurrent_context::llama_memory_recurrent_context( + llama_memory_recurrent * mem) : status(LLAMA_MEMORY_STATUS_SUCCESS), mem(mem), is_full(true) { } -llama_kv_cache_recurrent_state::llama_kv_cache_recurrent_state( - llama_memory_status status, - llama_kv_cache_recurrent * kv, - llama_sbatch sbatch, - std::vector ubatches) : status(status), kv(kv), sbatch(std::move(sbatch)), ubatches(std::move(ubatches)) {} +llama_memory_recurrent_context::llama_memory_recurrent_context( + llama_memory_recurrent * mem, + std::vector ubatches) : status(LLAMA_MEMORY_STATUS_SUCCESS), mem(mem), ubatches(std::move(ubatches)) {} -llama_kv_cache_recurrent_state::~llama_kv_cache_recurrent_state() = default; +llama_memory_recurrent_context::~llama_memory_recurrent_context() = default; -bool llama_kv_cache_recurrent_state::next() { +bool llama_memory_recurrent_context::next() { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); if (++i_next >= ubatches.size()) { @@ -1064,54 +1072,56 @@ bool llama_kv_cache_recurrent_state::next() { return true; } -bool llama_kv_cache_recurrent_state::apply() { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); +bool llama_memory_recurrent_context::apply() { + assert(!llama_memory_status_is_fail(status)); - kv->find_slot(ubatches[i_next]); + // no ubatches -> this is an update + if (ubatches.empty()) { + // recurrent cache never performs updates + assert(status == LLAMA_MEMORY_STATUS_NO_UPDATE); - return true; -} + return true; + } -std::vector & llama_kv_cache_recurrent_state::out_ids() { - assert(status == LLAMA_MEMORY_STATUS_SUCCESS); + mem->find_slot(ubatches[i_next]); - return sbatch.out_ids; + return true; } -llama_memory_status llama_kv_cache_recurrent_state::get_status() const { +llama_memory_status llama_memory_recurrent_context::get_status() const { return status; } -const llama_ubatch & llama_kv_cache_recurrent_state::get_ubatch() const { +const llama_ubatch & llama_memory_recurrent_context::get_ubatch() const { assert(status == LLAMA_MEMORY_STATUS_SUCCESS); return ubatches[i_next]; } -uint32_t llama_kv_cache_recurrent_state::get_n_kv() const { - return is_full ? kv->size : kv->n; +uint32_t llama_memory_recurrent_context::get_n_rs() const { + return is_full ? mem->size : mem->n; } -uint32_t llama_kv_cache_recurrent_state::get_head() const { - return is_full ? 0 : kv->head; +uint32_t llama_memory_recurrent_context::get_head() const { + return is_full ? 0 : mem->head; } -int32_t llama_kv_cache_recurrent_state::get_rs_z() const { - return is_full ? 0 : kv->rs_z; +int32_t llama_memory_recurrent_context::get_rs_z() const { + return is_full ? 0 : mem->rs_z; } -uint32_t llama_kv_cache_recurrent_state::get_size() const { - return kv->size; +uint32_t llama_memory_recurrent_context::get_size() const { + return mem->size; } -ggml_tensor * llama_kv_cache_recurrent_state::get_k_l(int32_t il) const { - return kv->k_l[il]; +ggml_tensor * llama_memory_recurrent_context::get_r_l(int32_t il) const { + return mem->r_l[il]; } -ggml_tensor * llama_kv_cache_recurrent_state::get_v_l(int32_t il) const { - return kv->v_l[il]; +ggml_tensor * llama_memory_recurrent_context::get_s_l(int32_t il) const { + return mem->s_l[il]; } -int32_t llama_kv_cache_recurrent_state::s_copy(int i) const { - return kv->cells[i + kv->head].src0; +int32_t llama_memory_recurrent_context::s_copy(int i) const { + return mem->cells[i + mem->head].src0; } diff --git a/src/llama-kv-cache-recurrent.h b/src/llama-memory-recurrent.h similarity index 63% rename from src/llama-kv-cache-recurrent.h rename to src/llama-memory-recurrent.h index d7c02ea872160..4d094f9a05788 100644 --- a/src/llama-kv-cache-recurrent.h +++ b/src/llama-memory-recurrent.h @@ -8,35 +8,40 @@ #include // -// llama_kv_cache_recurrent +// llama_memory_recurrent // -// TODO: extract the KV cache state used for graph computation into llama_kv_cache_recurrent_state_i -// see the implementation of llama_kv_cache_unified_state_i for an example how to do it -class llama_kv_cache_recurrent : public llama_memory_i { +// TODO: extract the cache state used for graph computation into llama_memory_recurrent_context_i +// see the implementation of llama_kv_cache_unified_context_i for an example how to do it +class llama_memory_recurrent : public llama_memory_i { public: - llama_kv_cache_recurrent( - const llama_model & model, - ggml_type type_k, - ggml_type type_v, - bool offload, - uint32_t kv_size, - uint32_t n_seq_max); - ~llama_kv_cache_recurrent() = default; + // this callback is used to filter out layers that should not be included in the cache + using layer_filter_cb = std::function; + + llama_memory_recurrent( + const llama_model & model, + layer_filter_cb && filter, + ggml_type type_r, + ggml_type type_s, + bool offload, + uint32_t mem_size, + uint32_t n_seq_max); + + ~llama_memory_recurrent() = default; // // llama_memory_i // - llama_memory_state_ptr init_batch( - const llama_batch & batch, + llama_memory_context_ptr init_batch( + llama_batch_allocr & balloc, uint32_t n_ubatch, - bool embd_pooled) override; + bool embd_all) override; - llama_memory_state_ptr init_full() override; + llama_memory_context_ptr init_full() override; - llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) override; + llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) override; void clear(bool data) override; @@ -51,7 +56,7 @@ class llama_kv_cache_recurrent : public llama_memory_i { bool prepare(const std::vector & ubatches); - // find a contiguous slot of kv cells and emplace the ubatch there + // find a contiguous slot of memory cells and emplace the ubatch there bool find_slot(const llama_ubatch & ubatch); bool get_can_shift() const override; @@ -72,7 +77,7 @@ class llama_kv_cache_recurrent : public llama_memory_i { int32_t rs_z = -1; // TODO: optimize for recurrent state needs - struct kv_cell { + struct mem_cell { llama_pos pos = -1; int32_t src = -1; // used to know where states should be copied from int32_t src0 = -1; // like src, but only used when setting the inputs (allowing to copy once) @@ -88,15 +93,16 @@ class llama_kv_cache_recurrent : public llama_memory_i { return seq_id.empty(); } - bool is_same_seq(const kv_cell & other) const { + bool is_same_seq(const mem_cell & other) const { return seq_id == other.seq_id; } }; - std::vector cells; + std::vector cells; - std::vector k_l; // per layer - std::vector v_l; + // per layer + std::vector r_l; + std::vector s_l; private: //const llama_model & model; @@ -109,8 +115,8 @@ class llama_kv_cache_recurrent : public llama_memory_i { size_t total_size() const; - size_t size_k_bytes() const; - size_t size_v_bytes() const; + size_t size_r_bytes() const; + size_t size_s_bytes() const; void state_write_meta(llama_io_write_i & io, const std::vector> & cell_ranges, llama_seq_id seq_id = -1) const; void state_write_data(llama_io_write_i & io, const std::vector> & cell_ranges) const; @@ -119,57 +125,50 @@ class llama_kv_cache_recurrent : public llama_memory_i { bool state_read_data(llama_io_read_i & io, uint32_t cell_count); }; -class llama_kv_cache_recurrent_state : public llama_memory_state_i { +class llama_memory_recurrent_context : public llama_memory_context_i { public: // used for errors - llama_kv_cache_recurrent_state(llama_memory_status status); - - // used to create a full-cache state - llama_kv_cache_recurrent_state( - llama_memory_status status, - llama_kv_cache_recurrent * kv); - - // used to create a state from a batch - llama_kv_cache_recurrent_state( - llama_memory_status status, - llama_kv_cache_recurrent * kv, - llama_sbatch sbatch, + llama_memory_recurrent_context(llama_memory_status status); + + // used to create a full-cache or update context + llama_memory_recurrent_context( + llama_memory_recurrent * mem); + + // used to create a batch processing context from a batch + llama_memory_recurrent_context( + llama_memory_recurrent * mem, std::vector ubatches); - virtual ~llama_kv_cache_recurrent_state(); + virtual ~llama_memory_recurrent_context(); // - // llama_memory_state_i + // llama_memory_context_i // bool next() override; bool apply() override; - std::vector & out_ids() override; - llama_memory_status get_status() const override; const llama_ubatch & get_ubatch() const override; // - // llama_kv_cache_recurrent_state specific API + // llama_memory_recurrent_context specific API // - uint32_t get_n_kv() const; + uint32_t get_n_rs() const; uint32_t get_head() const; int32_t get_rs_z() const; uint32_t get_size() const; - ggml_tensor * get_k_l(int32_t il) const; - ggml_tensor * get_v_l(int32_t il) const; + ggml_tensor * get_r_l(int32_t il) const; + ggml_tensor * get_s_l(int32_t il) const; int32_t s_copy(int i) const; private: const llama_memory_status status; - llama_kv_cache_recurrent * kv; - - llama_sbatch sbatch; + llama_memory_recurrent * mem; size_t i_next = 0; diff --git a/src/llama-memory.cpp b/src/llama-memory.cpp index f1107672c6476..ca6844c32a767 100644 --- a/src/llama-memory.cpp +++ b/src/llama-memory.cpp @@ -40,3 +40,20 @@ llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_me // if either status has an update, then the combined status has an update return has_update ? LLAMA_MEMORY_STATUS_SUCCESS : LLAMA_MEMORY_STATUS_NO_UPDATE; } + +bool llama_memory_status_is_fail(llama_memory_status status) { + switch (status) { + case LLAMA_MEMORY_STATUS_SUCCESS: + case LLAMA_MEMORY_STATUS_NO_UPDATE: + { + return false; + } + case LLAMA_MEMORY_STATUS_FAILED_PREPARE: + case LLAMA_MEMORY_STATUS_FAILED_COMPUTE: + { + return true; + } + } + + return false; +} diff --git a/src/llama-memory.h b/src/llama-memory.h index 42e226dc0ed61..e8ba336e8525d 100644 --- a/src/llama-memory.h +++ b/src/llama-memory.h @@ -3,10 +3,11 @@ #include "llama.h" #include -#include struct llama_ubatch; +class llama_batch_allocr; + class llama_io_write_i; class llama_io_read_i; @@ -26,23 +27,24 @@ enum llama_memory_status { LLAMA_MEMORY_STATUS_FAILED_COMPUTE, }; -// helper function for combining the status of two memory states +// helper function for combining the status of two memory contexts // useful for implementing hybrid memory types (e.g. iSWA) llama_memory_status llama_memory_status_combine(llama_memory_status s0, llama_memory_status s1); -// the interface for managing the memory state during batch processing +// helper function for checking if a memory status indicates a failure +bool llama_memory_status_is_fail(llama_memory_status status); + +// the interface for managing the memory context during batch processing // this interface is implemented per memory type. see: -// - llama_kv_cache_unified_state -// - llama_kv_cache_unified_iswa_state +// - llama_kv_cache_unified_context +// - llama_kv_cache_unified_iswa_context // ... // -// the only method that can mutate the memory and the memory state is llama_memory_i::apply() -// -// TODO: rename to llama_memory_context_i ? -struct llama_memory_state_i { - virtual ~llama_memory_state_i() = default; +// the only method that should mutate the memory and the memory context is llama_memory_i::apply() +struct llama_memory_context_i { + virtual ~llama_memory_context_i() = default; - // consume the current ubatch from the state and proceed to the next one + // consume the current ubatch from the context and proceed to the next one // return false if we are done virtual bool next() = 0; @@ -50,17 +52,14 @@ struct llama_memory_state_i { // return false on failure virtual bool apply() = 0; - // TODO: this might get reworked in the future when refactoring llama_batch - virtual std::vector & out_ids() = 0; - // get the current ubatch virtual const llama_ubatch & get_ubatch() const = 0; - // get the status of the memory state - used for error handling and checking if any updates would be applied + // get the status of the memory context - used for error handling and checking if any updates would be applied virtual llama_memory_status get_status() const = 0; }; -using llama_memory_state_ptr = std::unique_ptr; +using llama_memory_context_ptr = std::unique_ptr; // general concept of LLM memory // the KV cache is a type of LLM memory, but there can be other types @@ -68,19 +67,19 @@ struct llama_memory_i { virtual ~llama_memory_i() = default; // split the input batch into a set of ubatches and verify that they can fit into the cache - // return a state object containing the ubatches and KV cache state required to process them - // check the llama_memory_state_i::get_status() for the result - virtual llama_memory_state_ptr init_batch( - const llama_batch & batch, + // return a context object containing the ubatches and memory state required to process them + // check the llama_memory_context_i::get_status() for the result + virtual llama_memory_context_ptr init_batch( + llama_batch_allocr & balloc, uint32_t n_ubatch, - bool embd_pooled) = 0; + bool embd_all) = 0; // simulate full cache, used for allocating worst-case compute buffers - virtual llama_memory_state_ptr init_full() = 0; + virtual llama_memory_context_ptr init_full() = 0; // prepare for any pending memory updates, such as shifts, defrags, etc. // status == LLAMA_MEMORY_STATUS_NO_UPDATE if there is nothing to update - virtual llama_memory_state_ptr init_update(llama_context * lctx, bool optimize) = 0; + virtual llama_memory_context_ptr init_update(llama_context * lctx, bool optimize) = 0; // getters virtual bool get_can_shift() const = 0; diff --git a/src/llama-model-saver.cpp b/src/llama-model-saver.cpp index a70b9892347cb..563823dc35d8e 100644 --- a/src/llama-model-saver.cpp +++ b/src/llama-model-saver.cpp @@ -228,6 +228,7 @@ void llama_model_saver::add_kv_from_model() { // add_kv(LLM_KV_TOKENIZER_MASK_ID, ???); add_kv(LLM_KV_TOKENIZER_ADD_BOS, vocab.get_add_bos()); add_kv(LLM_KV_TOKENIZER_ADD_EOS, vocab.get_add_eos()); + add_kv(LLM_KV_TOKENIZER_ADD_SEP, vocab.get_add_sep()); add_kv(LLM_KV_TOKENIZER_ADD_PREFIX, vocab.get_add_space_prefix()); add_kv(LLM_KV_TOKENIZER_REMOVE_EXTRA_WS, vocab.get_remove_extra_whitespaces()); add_kv(LLM_KV_TOKENIZER_PRECOMPILED_CHARSMAP, vocab.get_precompiled_charsmap()); diff --git a/src/llama-model.cpp b/src/llama-model.cpp index c64bf9de939f4..589d95936b14d 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -8,7 +8,8 @@ #include "llama-kv-cache-unified.h" #include "llama-kv-cache-unified-iswa.h" -#include "llama-kv-cache-recurrent.h" +#include "llama-memory-hybrid.h" +#include "llama-memory-recurrent.h" #include "ggml-cpp.h" @@ -39,16 +40,21 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_190M: return "190M"; case LLM_TYPE_220M: return "220M"; case LLM_TYPE_250M: return "250M"; + case LLM_TYPE_256M: return "256M"; case LLM_TYPE_270M: return "270M"; case LLM_TYPE_335M: return "335M"; + case LLM_TYPE_350M: return "350M"; case LLM_TYPE_410M: return "410M"; case LLM_TYPE_450M: return "450M"; case LLM_TYPE_475M: return "475M"; + case LLM_TYPE_700M: return "700M"; case LLM_TYPE_770M: return "770M"; case LLM_TYPE_780M: return "780M"; + case LLM_TYPE_0_3B: return "0.3B"; case LLM_TYPE_0_5B: return "0.5B"; case LLM_TYPE_0_6B: return "0.6B"; case LLM_TYPE_1B: return "1B"; + case LLM_TYPE_1_2B: return "1.2B"; case LLM_TYPE_1_3B: return "1.3B"; case LLM_TYPE_1_4B: return "1.4B"; case LLM_TYPE_1_5B: return "1.5B"; @@ -80,6 +86,7 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_40B: return "40B"; case LLM_TYPE_65B: return "65B"; case LLM_TYPE_70B: return "70B"; + case LLM_TYPE_142B: return "142B"; case LLM_TYPE_236B: return "236B"; case LLM_TYPE_290B: return "290B"; case LLM_TYPE_314B: return "314B"; @@ -99,8 +106,13 @@ const char * llm_type_name(llm_type type) { case LLM_TYPE_57B_A14B: return "57B.A14B"; case LLM_TYPE_17B_16E: return "17Bx16E (Scout)"; case LLM_TYPE_17B_128E: return "17Bx128E (Maverick)"; + case LLM_TYPE_A13B: return "A13B"; + case LLM_TYPE_21B_A3B: return "21B.A3B"; case LLM_TYPE_30B_A3B: return "30B.A3B"; case LLM_TYPE_235B_A22B: return "235B.A22B"; + case LLM_TYPE_300B_A47B: return "300B.A47B"; + case LLM_TYPE_E2B: return "E2B"; + case LLM_TYPE_E4B: return "E4B"; default: return "?B"; } } @@ -203,23 +215,27 @@ static bool weight_buft_supported(const llama_hparams & hparams, ggml_tensor * w } break; case GGML_OP_SSM_CONV: { - // FIXME - ggml_tensor * conv_x = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, 12345, w->ne[1], 6789); + const int64_t n_seq_tokens = 512; + const int64_t n_seqs = 3; + ggml_tensor * conv_x = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, w->ne[0] - 1 + n_seq_tokens, w->ne[1], n_seqs); op_tensor = ggml_ssm_conv(ctx, conv_x, w); } break; case GGML_OP_SSM_SCAN: { - // FIXME - const int64_t d_state = w->ne[0]; - const int64_t d_inner = w->ne[1]; + // w is ssm_a, which is used to distinguish Mamba-1 and Mamba-2 + const int64_t d_state = w->ne[0] == 1 ? hparams.ssm_d_state : w->ne[0]; + const int64_t n_head = w->ne[1]; + const int64_t head_dim = hparams.ssm_d_inner / n_head; + const int64_t n_group = hparams.ssm_n_group ? hparams.ssm_n_group : 1; const int64_t n_seq_tokens = 512; - const int64_t n_seqs = 1; - ggml_tensor * s = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_state, d_inner, n_seqs); - ggml_tensor * x = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_seq_tokens, n_seqs); - ggml_tensor * dt = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_inner, n_seq_tokens, n_seqs); - ggml_tensor * B = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_state, n_seq_tokens, n_seqs); - ggml_tensor * C = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, d_state, n_seq_tokens, n_seqs); - op_tensor = ggml_ssm_scan(ctx, s, x, dt, w, B, C); + const int64_t n_seqs = 3; + ggml_tensor * s = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, d_state, head_dim, n_head, n_seqs); + ggml_tensor * x = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, head_dim, n_head, n_seq_tokens, n_seqs); + ggml_tensor * dt = ggml_new_tensor_3d(ctx, GGML_TYPE_F32, n_head, n_seq_tokens, n_seqs); + ggml_tensor * B = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, d_state, n_group, n_seq_tokens, n_seqs); + ggml_tensor * C = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, d_state, n_group, n_seq_tokens, n_seqs); + ggml_tensor * ids = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_seqs); + op_tensor = ggml_ssm_scan(ctx, s, x, dt, w, B, C, ids); } break; case GGML_OP_RWKV_WKV6: { @@ -469,6 +485,10 @@ void llama_model::load_hparams(llama_model_loader & ml) { std::fill(hparams.n_head_arr.begin(), hparams.n_head_arr.end(), 0); std::fill(hparams.n_head_kv_arr.begin(), hparams.n_head_kv_arr.end(), 0); std::fill(hparams.n_ff_arr.begin(), hparams.n_ff_arr.end(), 0); + std::fill( + hparams.recurrent_layer_arr.begin(), + hparams.recurrent_layer_arr.end(), + llm_arch_is_recurrent(ml.get_arch())); std::fill(hparams.rope_sections.begin(), hparams.rope_sections.end(), 0); @@ -567,6 +587,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { case 22: type = LLM_TYPE_1B; break; case 26: type = LLM_TYPE_3B; break; case 28: type = LLM_TYPE_3B; break; // Llama 3.2 3B + case 30: type = LLM_TYPE_256M; break; // smoldocling 256M // granite uses a vocab with len 49152 case 32: type = n_vocab == 49152 ? LLM_TYPE_3B : (n_vocab < 40000 ? LLM_TYPE_7B : LLM_TYPE_8B); break; case 36: type = LLM_TYPE_8B; break; // granite @@ -598,6 +619,16 @@ void llama_model::load_hparams(llama_model_loader & ml) { hparams.use_kq_norm = false; } } break; + case LLM_ARCH_ARCEE: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + // Arcee uses the same structure as Llama + switch (hparams.n_layer) { + case 36: type = LLM_TYPE_4B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; case LLM_ARCH_DECI: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); @@ -738,6 +769,16 @@ void llama_model::load_hparams(llama_model_loader & ml) { } } } break; + case LLM_ARCH_NEO_BERT: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn); + ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type); + + if (hparams.n_layer == 28) { + type = LLM_TYPE_250M; + } + } break; case LLM_ARCH_BLOOM: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); @@ -810,6 +851,21 @@ void llama_model::load_hparams(llama_model_loader & ml) { default: type = LLM_TYPE_UNKNOWN; } } break; + case LLM_ARCH_DREAM: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + // Dream models are primarily 7B with 28 layers + switch (hparams.n_layer) { + case 28: + type = LLM_TYPE_7B; + break; + default: + type = LLM_TYPE_UNKNOWN; + } + // Set non-causal attention for diffusion models + hparams.causal_attn = false; + } + break; case LLM_ARCH_QWEN2MOE: { ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp, false); @@ -896,6 +952,33 @@ void llama_model::load_hparams(llama_model_loader & ml) { default: type = LLM_TYPE_UNKNOWN; } } break; + case LLM_ARCH_PLAMO2: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + // Load Mamba SSM parameters + ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv); + ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner); + ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state); + ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank); + ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group); + + for (uint32_t i = 0; i < hparams.n_layer; ++i) { + hparams.recurrent_layer_arr[i] = hparams.n_head_kv(i) == 0; + } + + switch (hparams.n_layer) { + case 16: type = LLM_TYPE_1B; break; + case 32: + if (hparams.n_embd == 2048) { + type = LLM_TYPE_2B; + } else if (hparams.n_embd == 4096) { + type = LLM_TYPE_8B; + } + break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; case LLM_ARCH_GPT2: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); @@ -991,6 +1074,24 @@ void llama_model::load_hparams(llama_model_loader & ml) { ? 1.0f / std::sqrt(float(hparams.n_embd / hparams.n_head(0))) : 1.0f / std::sqrt(float(hparams.n_embd_head_k)); } break; + case LLM_ARCH_GEMMA3N: + { + hparams.swa_type = LLAMA_SWA_TYPE_STANDARD; + hparams.set_swa_pattern(5); + + hparams.rope_freq_base_train_swa = 10000.0f; + hparams.rope_freq_scale_train_swa = 1.0f; + hparams.f_attention_scale = 1.0f; + + ml.get_key(LLM_KV_ATTENTION_SLIDING_WINDOW, hparams.n_swa); + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + switch (hparams.n_layer) { + case 30: type = LLM_TYPE_E2B; break; + case 35: type = LLM_TYPE_E4B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; case LLM_ARCH_STARCODER2: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps); @@ -1034,6 +1135,58 @@ void llama_model::load_hparams(llama_model_loader & ml) { default: type = LLM_TYPE_UNKNOWN; } } break; + case LLM_ARCH_MAMBA2: + { + ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv); + ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner); + ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state); + ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank); + ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group); + + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + switch (hparams.n_layer) { + case 24: + switch (hparams.n_embd) { + case 768: type = LLM_TYPE_SMALL; break; + default: type = LLM_TYPE_UNKNOWN; + } break; + case 48: + switch (hparams.n_embd) { + case 1024: type = LLM_TYPE_MEDIUM; break; + case 1536: type = LLM_TYPE_LARGE; break; + case 2048: type = LLM_TYPE_XL; break; + default: type = LLM_TYPE_UNKNOWN; + } break; + case 64: + switch (hparams.n_embd) { + case 2560: type = LLM_TYPE_3B; break; + case 4096: type = LLM_TYPE_7B; break; + default: type = LLM_TYPE_UNKNOWN; + } break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_JAMBA: + { + ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv); + ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner); + ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state); + ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank); + + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + for (uint32_t i = 0; i < hparams.n_layer; ++i) { + hparams.recurrent_layer_arr[i] = hparams.n_head_kv(i) == 0; + } + + switch (hparams.n_layer) { + // TODO: Jamba layers are a bit heterogenous, so naming this is hard. + case 12: // 900M 8x???M + case 32: // 51B 16x?B + default: type = LLM_TYPE_UNKNOWN; + } + } break; case LLM_ARCH_XVERSE: { ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); @@ -1400,6 +1553,11 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale); ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale); + // Granite uses rope_finetuned as a switch for rope, so default to true + bool rope_finetuned = true; + ml.get_key(LLM_KV_ROPE_SCALING_FINETUNED, rope_finetuned, false); + hparams.rope_finetuned = rope_finetuned; + switch (hparams.n_layer) { case 32: type = LLM_TYPE_3B; break; case 40: type = LLM_TYPE_3B; break; @@ -1407,6 +1565,40 @@ void llama_model::load_hparams(llama_model_loader & ml) { default: type = LLM_TYPE_UNKNOWN; } + // For Granite MoE Shared + ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, /* required */ false); + } break; + case LLM_ARCH_GRANITE_HYBRID: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale, /* required */ false); + ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale, /* required */ false); + ml.get_key(LLM_KV_EMBEDDING_SCALE, hparams.f_embedding_scale, /* required */ false); + ml.get_key(LLM_KV_ATTENTION_SCALE, hparams.f_attention_scale, /* required */ false); + + ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv); + ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner); + ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state); + ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank); + ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group); + + // Granite uses rope_finetuned as a switch for rope, so default to true + bool rope_finetuned = true; + ml.get_key(LLM_KV_ROPE_SCALING_FINETUNED, rope_finetuned, false); + hparams.rope_finetuned = rope_finetuned; + + // A layer is recurrent IFF the n_head_kv value is set to 0 + for (uint32_t i = 0; i < hparams.n_layer; ++i) { + hparams.recurrent_layer_arr[i] = hparams.n_head_kv(i) == 0; + } + + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + switch (hparams.n_layer) { + // TODO: Add llm type label (not sure this is useful) + default: type = LLM_TYPE_UNKNOWN; + } + // For Granite MoE Shared ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, /* required */ false); } break; @@ -1444,6 +1636,104 @@ void llama_model::load_hparams(llama_model_loader & ml) { default: type = LLM_TYPE_UNKNOWN; } } break; + case LLM_ARCH_DOTS1: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead); + ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); + ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared); + ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale); + ml.get_key(LLM_KV_EXPERT_WEIGHTS_NORM, hparams.expert_weights_norm, false); + ml.get_key(LLM_KV_EXPERT_GATING_FUNC, hparams.expert_gating_func, false); + switch (hparams.n_layer) { + case 62: type = LLM_TYPE_142B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_ERNIE4_5: + case LLM_ARCH_ERNIE4_5_MOE: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + if (arch == LLM_ARCH_ERNIE4_5_MOE) { + ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); + ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp, false); + ml.get_key(LLM_KV_INTERLEAVE_MOE_LAYER_STEP, hparams.n_moe_layer_step); + ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead); + } + + switch (hparams.n_layer) { + case 18: type = LLM_TYPE_0_3B; break; + case 28: type = LLM_TYPE_21B_A3B; break; + case 54: type = LLM_TYPE_300B_A47B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_FALCON_H1: + { + // Common parameters + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + + // SSM parameters + ml.get_key(LLM_KV_SSM_CONV_KERNEL, hparams.ssm_d_conv); + ml.get_key(LLM_KV_SSM_INNER_SIZE, hparams.ssm_d_inner); + ml.get_key(LLM_KV_SSM_STATE_SIZE, hparams.ssm_d_state); + ml.get_key(LLM_KV_SSM_TIME_STEP_RANK, hparams.ssm_dt_rank); + ml.get_key(LLM_KV_SSM_GROUP_COUNT, hparams.ssm_n_group); + + std::fill(hparams.recurrent_layer_arr.begin(), hparams.recurrent_layer_arr.end(), true); + + switch (hparams.n_layer) { + case 36: + type = LLM_TYPE_0_5B; break; + case 24: + type = LLM_TYPE_1_5B; break; + case 66: + type = LLM_TYPE_1B; break; + case 32: + type = LLM_TYPE_3B; break; + case 44: + type = LLM_TYPE_7B; break; + case 72: + type = LLM_TYPE_34B; break; + default: + type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_HUNYUAN_MOE: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp); + ml.get_key(LLM_KV_EXPERT_SHARED_FEED_FORWARD_LENGTH, hparams.n_ff_shexp); + + switch (hparams.n_layer) { + case 32: type = LLM_TYPE_A13B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_SMOLLM3: + { + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + hparams.n_no_rope_layer_step = 4; + + switch (hparams.n_layer) { + case 36: type = LLM_TYPE_3B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; + case LLM_ARCH_LFM2: + { + ml.get_key(LLM_KV_SHORTCONV_L_CACHE, hparams.n_shortconv_l_cache); + ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps); + for (uint32_t il = 0; il < hparams.n_layer; ++il) { + hparams.recurrent_layer_arr[il] = hparams.n_head_kv(il) == 0; + } + switch (hparams.n_embd) { + case 1024: type = LLM_TYPE_350M; break; + case 1536: type = LLM_TYPE_700M; break; + case 2048: type = LLM_TYPE_1_2B; break; + default: type = LLM_TYPE_UNKNOWN; + } + } break; default: throw std::runtime_error("unsupported model architecture"); } @@ -2187,6 +2477,32 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.layer_out_norm_b = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "bias", i), {n_embd}, 0); } } break; + case LLM_ARCH_NEO_BERT: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + cls = create_tensor(tn(LLM_TENSOR_CLS, "weight"), {n_embd, n_embd}, TENSOR_NOT_REQUIRED); + cls_b = create_tensor(tn(LLM_TENSOR_CLS, "bias"), {n_embd}, TENSOR_NOT_REQUIRED); + + cls_out = create_tensor(tn(LLM_TENSOR_CLS_OUT, "weight"), {n_embd, hparams.n_cls_out}, TENSOR_NOT_REQUIRED); + cls_out_b = create_tensor(tn(LLM_TENSOR_CLS_OUT, "bias"), {hparams.n_cls_out}, TENSOR_NOT_REQUIRED); + + output_norm_enc = create_tensor(tn(LLM_TENSOR_ENC_OUTPUT_NORM, "weight"), {n_embd}, 0); + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, n_embd + 2*n_embd_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff*2}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); + } + } break; case LLM_ARCH_JINA_BERT_V2: { tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // word_embeddings @@ -2381,12 +2697,14 @@ bool llama_model::load_tensors(llama_model_loader & ml) { } break; case LLM_ARCH_QWEN2: case LLM_ARCH_QWEN2VL: + case LLM_ARCH_DREAM: { tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // output output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + output_b = create_tensor(tn(LLM_TENSOR_OUTPUT, "bias"), {n_vocab}, TENSOR_NOT_REQUIRED); // if output is NULL, init from the input tok embed if (output == NULL) { output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); @@ -2676,6 +2994,73 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); } } break; + case LLM_ARCH_PLAMO2: + { + const uint32_t d_conv = hparams.ssm_d_conv; + const uint32_t d_state = hparams.ssm_d_state; + const uint32_t num_heads = hparams.ssm_dt_rank; + const uint32_t intermediate_size = hparams.ssm_d_inner; + const uint32_t head_dim = intermediate_size / num_heads; + const uint32_t qk_dim = head_dim; + const uint32_t v_dim = head_dim; + const int64_t num_attention_heads = hparams.n_head(); + const int64_t q_num_heads = num_attention_heads; + const int64_t dt_dim = std::max(64, int(hparams.n_embd / 16)); + + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + bool is_mamba_layer = hparams.is_recurrent(i); + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + if (is_mamba_layer) { + layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {n_embd, 2 * intermediate_size}, 0); + layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, intermediate_size}, 0); + + layer.ssm_x = create_tensor(tn(LLM_TENSOR_SSM_X, "weight", i), {intermediate_size, dt_dim + 2*d_state}, 0); + layer.ssm_dt = create_tensor(tn(LLM_TENSOR_SSM_DT, "weight", i), {dt_dim, num_heads}, 0); + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {num_heads}, 0); + + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {num_heads}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {num_heads}, 0); + + layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {intermediate_size, n_embd}, 0); + + layer.ssm_dt_norm = create_tensor(tn(LLM_TENSOR_SSM_DT_NORM, i), {dt_dim}, 0); + layer.ssm_b_norm = create_tensor(tn(LLM_TENSOR_SSM_B_NORM, i), {d_state}, 0); + layer.ssm_c_norm = create_tensor(tn(LLM_TENSOR_SSM_C_NORM, i), {d_state}, 0); + } else { + const int64_t num_key_value_heads = hparams.n_head_kv(i); + const int64_t k_num_heads = num_key_value_heads; + const int64_t v_num_heads = num_key_value_heads; + const int64_t q_proj_dim = q_num_heads * qk_dim; + const int64_t k_proj_dim = k_num_heads * qk_dim; + const int64_t v_proj_dim = v_num_heads * v_dim; + + layer.wqkv = create_tensor(tn(LLM_TENSOR_ATTN_QKV, "weight", i), {n_embd, q_proj_dim + k_proj_dim + v_proj_dim}, 0); + layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {head_dim, num_attention_heads}, 0); + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {head_dim, k_num_heads}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {q_num_heads * v_dim, n_embd}, 0); + } + + // All layers have post-attention norm, FFN norm, and FFN tensors + layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, i), {n_embd}, 0); + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff * 2}, 0); + layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, i), {n_embd}, 0); + } + } break; case LLM_ARCH_GPT2: { tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); @@ -2884,13 +3269,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0); } } break; - case LLM_ARCH_STARCODER2: + case LLM_ARCH_GEMMA3N: { - tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); - - // output - output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); - output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0); + const int64_t n_altup = hparams.n_altup; + const int64_t laurel_rank = hparams.laurel_rank; + const int64_t n_embd_altup = hparams.n_embd_altup; output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); // if output is NULL, init from the input tok embed @@ -2898,31 +3281,89 @@ bool llama_model::load_tensors(llama_model_loader & ml) { output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); } - for (int i = 0; i < n_layer; ++i) { - auto & layer = layers[i]; + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + tok_embd_per_layer = create_tensor(tn(LLM_TENSOR_PER_LAYER_TOKEN_EMBD, "weight"), {n_embd_altup * n_layer, n_vocab}, 0); - layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); - layer.attn_norm_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}, 0); + altup_proj = create_tensor(tn(LLM_TENSOR_ALTUP_PROJ, "weight"), {n_embd, n_embd, n_altup - 1}, 0); + altup_unembd_proj = create_tensor(tn(LLM_TENSOR_ALTUP_UNEMBD_PROJ, "weight"), {n_embd, n_embd, n_altup - 1}, 0); + per_layer_model_proj = create_tensor(tn(LLM_TENSOR_PER_LAYER_MODEL_PROJ, "weight"), {n_embd, n_embd_altup * n_layer}, 0); + per_layer_proj_norm = create_tensor(tn(LLM_TENSOR_PER_LAYER_PROJ_NORM, "weight"), {n_embd_altup}, 0); - layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0); - layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0); - layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); - layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); - // optional bias tensors - layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, 0); - layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, 0); - layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, 0); - layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, 0); + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; - layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); - layer.ffn_norm_b = create_tensor(tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, 0); + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); - layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); - layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); - // optional bias tensors - layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0); + layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0); + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0); + layer.attn_post_norm = create_tensor(tn(LLM_TENSOR_ATTN_POST_NORM, "weight", i), {n_embd}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_post_norm = create_tensor(tn(LLM_TENSOR_FFN_POST_NORM, "weight", i), {n_embd}, 0); + + // altup & laurel + layer.per_layer_inp_gate = create_tensor(tn(LLM_TENSOR_PER_LAYER_INP_GATE, "weight", i), {n_embd, n_embd_altup}, 0); + layer.per_layer_proj = create_tensor(tn(LLM_TENSOR_PER_LAYER_PROJ, "weight", i), {n_embd_altup, n_embd}, 0); + layer.per_layer_post_norm = create_tensor(tn(LLM_TENSOR_PER_LAYER_POST_NORM, "weight", i), {n_embd}, 0); + layer.altup_correct_coef = create_tensor(tn(LLM_TENSOR_ALTUP_CORRECT_COEF, "weight", i), {n_altup, n_altup}, 0); + layer.altup_correct_scale = create_tensor(tn(LLM_TENSOR_ALTUP_CORRECT_SCALE, "weight", i), {n_embd}, 0); + layer.altup_predict_coef = create_tensor(tn(LLM_TENSOR_ALTUP_PREDICT_COEF, "weight", i), {n_altup, n_altup * n_altup}, 0); + layer.altup_router = create_tensor(tn(LLM_TENSOR_ALTUP_ROUTER, "weight", i), {n_embd, n_altup}, 0); + layer.altup_router_norm = create_tensor(tn(LLM_TENSOR_ALTUP_ROUTER_NORM, "weight", i), {n_embd}, 0); + layer.laurel_l = create_tensor(tn(LLM_TENSOR_LAUREL_L, "weight", i), {n_embd, laurel_rank}, 0); + layer.laurel_r = create_tensor(tn(LLM_TENSOR_LAUREL_R, "weight", i), {laurel_rank, n_embd}, 0); + layer.laurel_post_norm = create_tensor(tn(LLM_TENSOR_LAUREL_POST_NORM, "weight", i), {n_embd}, 0); + } + } break; + case LLM_ARCH_STARCODER2: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output_norm_b = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, 0); + + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + layer.attn_norm_b = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "bias", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + + // optional bias tensors + layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, 0); + layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, 0); + layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, 0); + layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.ffn_norm_b = create_tensor(tn(LLM_TENSOR_FFN_NORM, "bias", i), {n_embd}, 0); + + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + + // optional bias tensors + layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, 0); layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP , "bias", i), { n_ff}, 0); } } break; @@ -2973,6 +3414,228 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd}, 0); } } break; + case LLM_ARCH_MAMBA2: + { + const int64_t d_conv = hparams.ssm_d_conv; + const int64_t d_inner = hparams.ssm_d_inner; + const int64_t d_state = hparams.ssm_d_state; + const int64_t n_head = hparams.ssm_dt_rank; + const int64_t n_group = hparams.ssm_n_group; + const int64_t d_in_proj = 2*d_inner + 2*n_group*d_state + n_head; + + // only an expansion factor of 2 is supported for now + GGML_ASSERT(2 * n_embd == d_inner); + + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + { + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed, duplicated to allow offloading + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + // norm + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {n_embd, d_in_proj}, 0); + + layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, d_inner + 2*n_group*d_state}, 0); + layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {d_inner + 2*n_group*d_state}, 0); + + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {n_head}, 0); + + // no "weight" suffix for these + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, n_head}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, n_head}, 0); + + layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {d_inner / n_group, n_group}, 0); + + // out_proj + layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd}, 0); + } + } break; + case LLM_ARCH_JAMBA: + { + const int64_t d_conv = hparams.ssm_d_conv; + const int64_t d_inner = hparams.ssm_d_inner; + const int64_t d_state = hparams.ssm_d_state; + const int64_t dt_rank = hparams.ssm_dt_rank; + + // only an expansion factor of 2 is supported for now + GGML_ASSERT(2 * n_embd == d_inner); + + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + { + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed, duplicated to allow offloading + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + } + + for (int i = 0; i < n_layer; ++i) { + const int64_t n_head_kv = hparams.n_head_kv(i); + const int64_t n_embd_gqa = hparams.n_embd_v_gqa(i); + + auto & layer = layers[i]; + + // norm + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + if (n_head_kv == 0) { + // Mamba layer + layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {n_embd, 2*d_inner}, 0); + + layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, d_inner}, 0); + layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {d_inner}, 0); + + layer.ssm_x = create_tensor(tn(LLM_TENSOR_SSM_X, "weight", i), {d_inner, dt_rank + 2*d_state}, 0); + + layer.ssm_dt_norm = create_tensor(tn(LLM_TENSOR_SSM_DT_NORM, "weight", i), {dt_rank}, 0); + + layer.ssm_dt = create_tensor(tn(LLM_TENSOR_SSM_DT, "weight", i), {dt_rank, d_inner}, 0); + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {d_inner}, 0); + + layer.ssm_b_norm = create_tensor(tn(LLM_TENSOR_SSM_B_NORM, "weight", i), {d_state}, 0); + layer.ssm_c_norm = create_tensor(tn(LLM_TENSOR_SSM_C_NORM, "weight", i), {d_state}, 0); + + // no "weight" suffix for these + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {d_state, d_inner}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {d_inner}, 0); + + // out_proj + layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd}, 0); + } else { + // Attention layers + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + } + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, TENSOR_NOT_REQUIRED); + + if (layer.ffn_gate_inp) { + // MoE + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); + } else { + // FFN (no MoE) + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), {n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } + } + } break; + case LLM_ARCH_GRANITE_HYBRID: + { + // mamba2 Mixer SSM params + // NOTE: int64_t for tensor dimensions + const int64_t d_conv = hparams.ssm_d_conv; + const int64_t d_inner = hparams.ssm_d_inner; + const int64_t d_state = hparams.ssm_d_state; + const int64_t n_ssm_head = hparams.ssm_dt_rank; + const int64_t n_group = hparams.ssm_n_group; + const int64_t d_in_proj = 2*d_inner + 2*n_group*d_state + n_ssm_head; + + // only an expansion factor of 2 is supported for now + GGML_ASSERT(2 * n_embd == d_inner); + + // embeddings + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + { + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed, duplicated to allow offloading + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + // norm + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + if (hparams.is_recurrent(i)) { + // ssm layers + layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {n_embd, d_in_proj}, 0); + + layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {d_conv, d_inner + 2*n_group*d_state}, 0); + layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {d_inner + 2*n_group*d_state}, TENSOR_NOT_REQUIRED); + + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {n_ssm_head}, 0); + + // no "weight" suffix for these + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, n_ssm_head}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, n_ssm_head}, 0); + + layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {d_inner / n_group, n_group}, 0); + + // out_proj + layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {d_inner, n_embd}, 0); + } else { + // attention layers (with optional bias) + const int64_t n_head_i = hparams.n_head(i); + const int64_t n_embd_k_gqa_i = hparams.n_embd_k_gqa(i); + const int64_t n_embd_v_gqa_i = hparams.n_embd_v_gqa(i); + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head_i}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa_i}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa_i}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head_i, n_embd}, 0); + layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED); + layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_k_gqa_i}, TENSOR_NOT_REQUIRED); + layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_v_gqa_i}, TENSOR_NOT_REQUIRED); + layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED); + } + + // feed forward (w/ optional biases) + if (n_expert > 0) { + // MoE FFN + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0)); + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0); + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff, n_expert}, TENSOR_NOT_REQUIRED); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); + + // For Granite MoE Shared + if (hparams.n_ff_shexp > 0) { + layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, hparams.n_ff_shexp}, 0); + layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, hparams.n_ff_shexp}, 0); + layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {hparams.n_ff_shexp, n_embd}, 0); + } + } else { + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0)); + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_gate_b = create_tensor(tn(LLM_TENSOR_FFN_GATE, "bias", i), {n_ff}, TENSOR_NOT_REQUIRED); + layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED); + layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}, TENSOR_NOT_REQUIRED); + } + } + } break; case LLM_ARCH_XVERSE: { tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); @@ -4123,59 +4786,371 @@ bool llama_model::load_tensors(llama_model_loader & ml) { layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0); } } break; - default: - throw std::runtime_error("unknown architecture"); - } + case LLM_ARCH_DOTS1: + { + const int64_t n_ff_exp = hparams.n_ff_exp; + const int64_t n_expert_shared = hparams.n_expert_shared; - if (n_moved_tensors > 0) { - LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %d others) cannot be used with preferred buffer type %s, using %s instead\n", - __func__, first_moved_tensor->name, ggml_type_name(first_moved_tensor->type), n_moved_tensors - 1, - ggml_backend_buft_name(first_moved_from_buft), ggml_backend_buft_name(first_moved_to_buft)); - } - } + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); - ml.done_getting_tensors(); + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0); - ml.init_mappings(true, use_mlock ? &pimpl->mlock_mmaps : nullptr); - pimpl->mappings.reserve(ml.mappings.size()); + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; - // create the backend buffers - std::vector> ctx_bufs; - ctx_bufs.reserve(ctx_map.size()); + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); - // Ensure we have enough capacity for the maximum backend buffer we will potentially create - const size_t n_max_backend_buffer = ctx_map.size() * ml.files.size(); - pimpl->bufs.reserve(n_max_backend_buffer); + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); - for (auto & it : ctx_map) { - ggml_backend_buffer_type_t buft = it.first; - ggml_context * ctx = it.second; + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0); + layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0); - // skip contexts without tensors - if (ggml_get_first_tensor(ctx) == nullptr) { - continue; - } + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); - llama_buf_map buf_map; - buf_map.reserve(n_max_backend_buffer); + if (i < (int) hparams.n_layer_dense_lead) { + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } else { + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0); + layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED); - // check if it is possible to use buffer_from_host_ptr with this buffer type - ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft); - if (!dev) { - // FIXME: workaround for CPU backend buft having a NULL device - dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); - if (!dev) { - throw std::runtime_error(format("%s: no CPU backend found", __func__)); - } - } - ggml_backend_dev_props props; - ggml_backend_dev_get_props(dev, &props); - bool buffer_from_host_ptr_supported = props.caps.buffer_from_host_ptr; - bool is_default_buft = buft == ggml_backend_dev_buffer_type(dev); + if (n_expert == 0) { + throw std::runtime_error("n_expert must be > 0"); + } + if (n_expert_used == 0) { + throw std::runtime_error("n_expert_used must be > 0"); + } - if (ml.use_mmap && use_mmap_buffer && buffer_from_host_ptr_supported && is_default_buft) { - for (uint32_t idx = 0; idx < ml.files.size(); idx++) { - // only the mmap region containing the tensors in the model is mapped to the backend buffer + // MoE branch + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}, 0); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), {n_ff_exp, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), { n_embd, n_ff_exp, n_expert}, 0); + + // Shared expert branch + layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0); + layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), { n_ff_exp * n_expert_shared, n_embd}, 0); + layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, n_ff_exp * n_expert_shared}, 0); + } + } + } break; + case LLM_ARCH_ARCEE: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0)); + + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } + } break; + case LLM_ARCH_ERNIE4_5: + case LLM_ARCH_ERNIE4_5_MOE: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); + + // optional bias tensors + layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED); + layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED); + layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {n_embd_gqa}, TENSOR_NOT_REQUIRED); + layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {n_embd}, TENSOR_NOT_REQUIRED); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + if (arch == LLM_ARCH_ERNIE4_5_MOE && static_cast(i) >= hparams.n_layer_dense_lead) { // MoE layers + int n_ff_exp = hparams.n_ff_exp; + + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0); + layer.ffn_exp_probs_b = create_tensor(tn(LLM_TENSOR_FFN_EXP_PROBS_B, "bias", i), {n_expert}, TENSOR_NOT_REQUIRED); + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, TENSOR_NOT_REQUIRED); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff_exp, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff_exp, n_expert}, 0); + + // Shared expert (if present) + if (hparams.n_ff_shexp > 0) { + layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), { n_embd, hparams.n_ff_shexp}, 0); + layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {hparams.n_ff_shexp, n_embd }, 0); + layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), { n_embd, hparams.n_ff_shexp}, 0); + } + } else { // Dense layers + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } + } + } break; + case LLM_ARCH_FALCON_H1: + { + // Common + const int64_t hidden_size = hparams.n_embd; // hidden_size + + // mamba2 Mixer SSM params + const int64_t ssm_conv_kernel_size = hparams.ssm_d_conv; // ssm_conv_kernel_size + const int64_t ssm_n_groups = hparams.ssm_n_group; // ssm_n_groups + const int64_t ssm_state_size = hparams.ssm_d_state; // ssm_state_size + const int64_t ssm_intermediate_size = hparams.ssm_d_inner; // TODO expand + const int64_t ssm_num_heads = hparams.ssm_dt_rank; // ssm_num_heads + const int64_t ssm_conv_dim = ssm_intermediate_size + 2 * ssm_n_groups * ssm_state_size; + const int64_t ssm_projection_size = ssm_intermediate_size + ssm_conv_dim + ssm_num_heads; + + // attn params + const int64_t attn_num_attention_head = hparams.n_head(0); // rename to: attn_num_attention_head + const int64_t attn_num_key_value_head = hparams.n_head_kv(0); + + // ffn params + const int64_t ffn_intermediate_size = hparams.n_ff(0); + + // embeddings + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hidden_size, n_vocab}, 0); + + // output + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {hidden_size, n_vocab}, TENSOR_NOT_REQUIRED); + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {hidden_size}, 0); + + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {hidden_size, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + /*SSM LAYERS*/ + // ssm in + layer.ssm_in = create_tensor(tn(LLM_TENSOR_SSM_IN, "weight", i), {hidden_size, ssm_projection_size}, 0); + // ssm 1d conv + layer.ssm_conv1d = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "weight", i), {ssm_conv_kernel_size, ssm_conv_dim}, 0); + layer.ssm_conv1d_b = create_tensor(tn(LLM_TENSOR_SSM_CONV1D, "bias", i), {ssm_conv_dim}, TENSOR_NOT_REQUIRED); + // ssm_dt + layer.ssm_dt_b = create_tensor(tn(LLM_TENSOR_SSM_DT, "bias", i), {ssm_num_heads}, 0); + // no "weight" suffix for these + layer.ssm_a = create_tensor(tn(LLM_TENSOR_SSM_A, i), {1, ssm_num_heads}, 0); + layer.ssm_d = create_tensor(tn(LLM_TENSOR_SSM_D, i), {1, ssm_num_heads}, 0); + // ssm_norm + layer.ssm_norm = create_tensor(tn(LLM_TENSOR_SSM_NORM, "weight", i), {ssm_intermediate_size / ssm_n_groups, ssm_n_groups}, TENSOR_NOT_REQUIRED); + // out_proj + layer.ssm_out = create_tensor(tn(LLM_TENSOR_SSM_OUT, "weight", i), {ssm_intermediate_size, hidden_size}, 0); + + /*ATTENTION LAYERS*/ + // attention layers (with optional bias) + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {hidden_size, n_embd_head_k * attn_num_attention_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {hidden_size, attn_num_key_value_head * n_embd_head_k}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {hidden_size, attn_num_key_value_head * n_embd_head_v}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * attn_num_attention_head, hidden_size}, 0); + layer.bq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "bias", i), {hidden_size}, TENSOR_NOT_REQUIRED); + layer.bk = create_tensor(tn(LLM_TENSOR_ATTN_K, "bias", i), {attn_num_key_value_head * n_embd_head_k}, TENSOR_NOT_REQUIRED); + layer.bv = create_tensor(tn(LLM_TENSOR_ATTN_V, "bias", i), {attn_num_key_value_head * n_embd_head_v}, TENSOR_NOT_REQUIRED); + layer.bo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "bias", i), {hidden_size}, TENSOR_NOT_REQUIRED); + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {hidden_size}, 0); + + + // feed forward (w/ optional biases) + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, i), {hidden_size}, 0); + layer.rope_freqs = create_tensor(tn(LLM_TENSOR_ROPE_FREQS, "weight", i), {n_rot/2}, TENSOR_NOT_REQUIRED | (i != 0 ? TENSOR_DUPLICATED : 0)); + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {hidden_size, ffn_intermediate_size}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { ffn_intermediate_size, hidden_size}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {hidden_size, ffn_intermediate_size}, 0); + + layer.ffn_gate_b = create_tensor(tn(LLM_TENSOR_FFN_GATE, "bias", i), {ffn_intermediate_size}, TENSOR_NOT_REQUIRED); + layer.ffn_down_b = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "bias", i), {hidden_size}, TENSOR_NOT_REQUIRED); + layer.ffn_up_b = create_tensor(tn(LLM_TENSOR_FFN_UP, "bias", i), {ffn_intermediate_size}, TENSOR_NOT_REQUIRED); + } + } break; + case LLM_ARCH_HUNYUAN_MOE: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); + + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0); + layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + + layer.ffn_gate_inp = create_tensor(tn(LLM_TENSOR_FFN_GATE_INP, "weight", i), {n_embd, n_expert}, 0); + layer.ffn_gate_exps = create_tensor(tn(LLM_TENSOR_FFN_GATE_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); + layer.ffn_down_exps = create_tensor(tn(LLM_TENSOR_FFN_DOWN_EXPS, "weight", i), { n_ff, n_embd, n_expert}, 0); + layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0); + + layer.ffn_gate_shexp = create_tensor(tn(LLM_TENSOR_FFN_GATE_SHEXP, "weight", i), {n_embd, hparams.n_ff_shexp}, 0); + layer.ffn_up_shexp = create_tensor(tn(LLM_TENSOR_FFN_UP_SHEXP, "weight", i), {n_embd, hparams.n_ff_shexp}, 0); + layer.ffn_down_shexp = create_tensor(tn(LLM_TENSOR_FFN_DOWN_SHEXP, "weight", i), {hparams.n_ff_shexp, n_embd}, 0); + } + } break; + case LLM_ARCH_SMOLLM3: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + + // output + output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0); + output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED); + + // if output is NULL, init from the input tok embed + if (output == NULL) { + output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED); + } + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_head_k * n_head}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_k_gqa}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_v_gqa}, 0); + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd_head_k * n_head, n_embd}, 0); + + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + } + } break; + case LLM_ARCH_LFM2: + { + tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); + + for (int i = 0; i < n_layer; ++i) { + auto & layer = layers[i]; + // ffn is same for transformer and conv layers + layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0); + layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0); + layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0); + layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0); + + // for operator_norm + layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0); + + if (!hparams.is_recurrent(i)) { + layer.attn_q_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), {n_embd_head_k}, 0); + layer.attn_k_norm = create_tensor(tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), {n_embd_head_k}, 0); + GGML_ASSERT(n_embd_v_gqa == n_embd_k_gqa); + + layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0); + layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, hparams.n_embd_k_gqa(i)}, 0); + layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, hparams.n_embd_v_gqa(i)}, 0); + + layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0); + } else { + layer.shortconv.conv = create_tensor(tn(LLM_TENSOR_SHORTCONV_CONV, "weight", i), {hparams.n_shortconv_l_cache, n_embd}, 0); + layer.shortconv.in_proj = create_tensor(tn(LLM_TENSOR_SHORTCONV_INPROJ, "weight", i), {n_embd, 3 * n_embd}, 0); + layer.shortconv.out_proj = create_tensor(tn(LLM_TENSOR_SHORTCONV_OUTPROJ, "weight", i), {n_embd, n_embd}, 0); + } + } + } break; + default: + throw std::runtime_error("unknown architecture"); + } + + if (n_moved_tensors > 0) { + LLAMA_LOG_DEBUG("%s: tensor '%s' (%s) (and %d others) cannot be used with preferred buffer type %s, using %s instead\n", + __func__, first_moved_tensor->name, ggml_type_name(first_moved_tensor->type), n_moved_tensors - 1, + ggml_backend_buft_name(first_moved_from_buft), ggml_backend_buft_name(first_moved_to_buft)); + } + } + + ml.done_getting_tensors(); + + ml.init_mappings(true, use_mlock ? &pimpl->mlock_mmaps : nullptr); + pimpl->mappings.reserve(ml.mappings.size()); + + // create the backend buffers + std::vector> ctx_bufs; + ctx_bufs.reserve(ctx_map.size()); + + // Ensure we have enough capacity for the maximum backend buffer we will potentially create + const size_t n_max_backend_buffer = ctx_map.size() * ml.files.size(); + pimpl->bufs.reserve(n_max_backend_buffer); + + for (auto & it : ctx_map) { + ggml_backend_buffer_type_t buft = it.first; + ggml_context * ctx = it.second; + + // skip contexts without tensors + if (ggml_get_first_tensor(ctx) == nullptr) { + continue; + } + + llama_buf_map buf_map; + buf_map.reserve(n_max_backend_buffer); + + // check if it is possible to use buffer_from_host_ptr with this buffer type + ggml_backend_dev_t dev = ggml_backend_buft_get_device(buft); + if (!dev) { + // FIXME: workaround for CPU backend buft having a NULL device + dev = ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU); + if (!dev) { + throw std::runtime_error(format("%s: no CPU backend found", __func__)); + } + } + ggml_backend_dev_props props; + ggml_backend_dev_get_props(dev, &props); + bool buffer_from_host_ptr_supported = props.caps.buffer_from_host_ptr; + bool is_default_buft = buft == ggml_backend_dev_buffer_type(dev); + + if (ml.use_mmap && use_mmap_buffer && buffer_from_host_ptr_supported && is_default_buft) { + for (uint32_t idx = 0; idx < ml.files.size(); idx++) { + // only the mmap region containing the tensors in the model is mapped to the backend buffer // this is important for metal with apple silicon: if the entire model could be mapped to a metal buffer, then we could just use metal for all layers // this allows using partial offloading when the model size exceeds the metal buffer size, but not the RAM size void * addr = nullptr; @@ -4362,12 +5337,6 @@ void llama_model::print_info() const { LLAMA_LOG_INFO("%s: freq_scale_train = %g\n", __func__, hparams.rope_freq_scale_train); LLAMA_LOG_INFO("%s: n_ctx_orig_yarn = %u\n", __func__, hparams.n_ctx_orig_yarn); LLAMA_LOG_INFO("%s: rope_finetuned = %s\n", __func__, hparams.rope_finetuned ? "yes" : "unknown"); - LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv); - LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner); - LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state); - LLAMA_LOG_INFO("%s: ssm_dt_rank = %u\n", __func__, hparams.ssm_dt_rank); - LLAMA_LOG_INFO("%s: ssm_dt_b_c_rms = %d\n", __func__, hparams.ssm_dt_b_c_rms); - if (!classifier_labels.empty()) { LLAMA_LOG_INFO("%s: n_cls_out = %u\n", __func__, hparams.n_cls_out); @@ -4378,6 +5347,20 @@ void llama_model::print_info() const { } } + if (arch == LLM_ARCH_MAMBA || + arch == LLM_ARCH_MAMBA2 || + arch == LLM_ARCH_JAMBA || + arch == LLM_ARCH_FALCON_H1 || + arch == LLM_ARCH_PLAMO2 || + arch == LLM_ARCH_GRANITE_HYBRID) { + LLAMA_LOG_INFO("%s: ssm_d_conv = %u\n", __func__, hparams.ssm_d_conv); + LLAMA_LOG_INFO("%s: ssm_d_inner = %u\n", __func__, hparams.ssm_d_inner); + LLAMA_LOG_INFO("%s: ssm_d_state = %u\n", __func__, hparams.ssm_d_state); + LLAMA_LOG_INFO("%s: ssm_dt_rank = %u\n", __func__, hparams.ssm_dt_rank); + LLAMA_LOG_INFO("%s: ssm_n_group = %u\n", __func__, hparams.ssm_n_group); + LLAMA_LOG_INFO("%s: ssm_dt_b_c_rms = %d\n", __func__, hparams.ssm_dt_b_c_rms); + } + LLAMA_LOG_INFO("%s: model type = %s\n", __func__, type_name().c_str()); if (pimpl->n_elements >= 1e12) { LLAMA_LOG_INFO("%s: model params = %.2f T\n", __func__, pimpl->n_elements*1e-12); @@ -4424,7 +5407,8 @@ void llama_model::print_info() const { if (arch == LLM_ARCH_MINICPM || arch == LLM_ARCH_GRANITE || - arch == LLM_ARCH_GRANITE_MOE) { + arch == LLM_ARCH_GRANITE_MOE || + arch == LLM_ARCH_GRANITE_HYBRID) { LLAMA_LOG_INFO("%s: f_embedding_scale = %f\n", __func__, hparams.f_embedding_scale); LLAMA_LOG_INFO("%s: f_residual_scale = %f\n", __func__, hparams.f_residual_scale); LLAMA_LOG_INFO("%s: f_attention_scale = %f\n", __func__, hparams.f_attention_scale); @@ -4558,6 +5542,8 @@ struct llm_build_llama : public llm_graph_context { const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -4620,9 +5606,7 @@ struct llm_build_llama : public llm_graph_context { cb(cur, "attn_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -4718,6 +5702,8 @@ struct llm_build_llama_iswa : public llm_graph_context { const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -4794,9 +5780,7 @@ struct llm_build_llama_iswa : public llm_graph_context { cb(cur, "attn_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -4896,6 +5880,9 @@ struct llm_build_deci : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; const int64_t n_head_kv = hparams.n_head_kv(il); @@ -4969,9 +5956,7 @@ struct llm_build_deci : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5050,6 +6035,8 @@ struct llm_build_baichuan : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -5101,9 +6088,7 @@ struct llm_build_baichuan : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5172,6 +6157,8 @@ struct llm_build_xverse : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -5216,9 +6203,7 @@ struct llm_build_xverse : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5286,6 +6271,8 @@ struct llm_build_falcon : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * attn_norm; @@ -5311,12 +6298,10 @@ struct llm_build_falcon : public llm_graph_context { cur = build_lora_mm(model.layers[il].wqkv, cur); cb(cur, "wqkv", il); - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); // using mode = 2 for neox mode @@ -5341,9 +6326,7 @@ struct llm_build_falcon : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); attn_norm = ggml_get_rows(ctx0, attn_norm, inp_out_ids); @@ -5412,6 +6395,8 @@ struct llm_build_grok : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -5471,9 +6456,7 @@ struct llm_build_grok : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5572,6 +6555,8 @@ struct llm_build_dbrx : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -5593,12 +6578,10 @@ struct llm_build_dbrx : public llm_graph_context { cur = ggml_clamp(ctx0, cur, -hparams.f_clamp_kqv, hparams.f_clamp_kqv); cb(cur, "wqkv_clamped", il); - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -5622,9 +6605,7 @@ struct llm_build_dbrx : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5704,6 +6685,8 @@ struct llm_build_starcoder : public llm_graph_context { inpL = ggml_add(ctx0, inpL, pos); cb(inpL, "inpL", -1); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -5736,9 +6719,7 @@ struct llm_build_starcoder : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -5803,6 +6784,8 @@ struct llm_build_refact : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -5835,9 +6818,7 @@ struct llm_build_refact : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -5923,78 +6904,79 @@ struct llm_build_bert : public llm_graph_context { auto * inp_attn = build_attn_inp_no_cache(); - // iterate layers + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * cur = inpL; - ggml_tensor * Qcur; - ggml_tensor * Kcur; - ggml_tensor * Vcur; + { + ggml_tensor * Qcur; + ggml_tensor * Kcur; + ggml_tensor * Vcur; - // self-attention - if (model.layers[il].wqkv) { - cur = build_lora_mm(model.layers[il].wqkv, cur); - cb(cur, "wqkv", il); + // self-attention + if (model.layers[il].wqkv) { + cur = build_lora_mm(model.layers[il].wqkv, cur); + cb(cur, "wqkv", il); - if (model.layers[il].bqkv) { - cur = ggml_add(ctx0, cur, model.layers[il].bqkv); - cb(cur, "bqkv", il); - } + if (model.layers[il].bqkv) { + cur = ggml_add(ctx0, cur, model.layers[il].bqkv); + cb(cur, "bqkv", il); + } - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); - Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - } else { - Qcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wq, cur), model.layers[il].bq); - Kcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wk, cur), model.layers[il].bk); - Vcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wv, cur), model.layers[il].bv); - } + Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); + Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); + } else { + Qcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wq, cur), model.layers[il].bq); + Kcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wk, cur), model.layers[il].bk); + Vcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wv, cur), model.layers[il].bv); + } - if (model.layers[il].attn_q_norm) { - Qcur = build_norm(Qcur, - model.layers[il].attn_q_norm, - model.layers[il].attn_q_norm_b, - LLM_NORM, il); - } + if (model.layers[il].attn_q_norm) { + Qcur = build_norm(Qcur, + model.layers[il].attn_q_norm, + model.layers[il].attn_q_norm_b, + LLM_NORM, il); + } - if (model.layers[il].attn_k_norm) { - Kcur = build_norm(Kcur, - model.layers[il].attn_k_norm, - model.layers[il].attn_k_norm_b, - LLM_NORM, il); - } + if (model.layers[il].attn_k_norm) { + Kcur = build_norm(Kcur, + model.layers[il].attn_k_norm, + model.layers[il].attn_k_norm_b, + LLM_NORM, il); + } - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); - Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - // RoPE - if (model.arch == LLM_ARCH_NOMIC_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) { - Qcur = ggml_rope_ext( - ctx0, Qcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); + // RoPE + if (model.arch == LLM_ARCH_NOMIC_BERT || model.arch == LLM_ARCH_NOMIC_BERT_MOE) { + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); - Kcur = ggml_rope_ext( - ctx0, Kcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - } + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + } - cb(Qcur, "Qcur", il); - cb(Kcur, "Kcur", il); - cb(Vcur, "Vcur", il); + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); - cur = build_attn(inp_attn, gf, - model.layers[il].wo, model.layers[il].bo, - Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - cb(cur, "kqv_out", il); + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + cb(cur, "kqv_out", il); + } - if (il == n_layer - 1 && pooling_type == LLAMA_POOLING_TYPE_NONE) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -6074,6 +7056,116 @@ struct llm_build_bert : public llm_graph_context { } }; +struct llm_build_neo_bert : public llm_graph_context { + llm_build_neo_bert(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + const int64_t n_embd_gqa = hparams.n_embd_v_gqa(); + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + + ggml_tensor * cur; + ggml_tensor * inpL; + ggml_tensor * inp_pos = build_inp_pos(); + + // construct input embeddings (token, type, position) + inpL = build_inp_embd(model.tok_embd); + cb(inpL, "inp_embd", -1); + + auto * inp_attn = build_attn_inp_no_cache(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * cur = inpL; + + // pre-norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + + { + ggml_tensor * Qcur; + ggml_tensor * Kcur; + ggml_tensor * Vcur; + + // self-attention + cur = build_lora_mm(model.layers[il].wqkv, cur); + cb(cur, "wqkv", il); + + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); + Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); + + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + // RoPE + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, nullptr, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + cb(cur, "kqv_out", il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + } + + // re-add the layer input + cur = ggml_add(ctx0, cur, inpL); + + ggml_tensor * ffn_inp = cur; + cb(ffn_inp, "ffn_inp", il); + + // pre-norm + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + // feed-forward network + cur = build_ffn(cur, + model.layers[il].ffn_up, + NULL, NULL, NULL, NULL, NULL, + model.layers[il].ffn_down, + NULL, NULL, NULL, + LLM_FFN_SWIGLU, LLM_FFN_SEQ, il); + + // attentions bypass the intermediate layer + cur = ggml_add(ctx0, cur, ffn_inp); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm_enc, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_embd", -1); + res->t_embd = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + struct llm_build_bloom : public llm_graph_context { llm_build_bloom(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { const int64_t n_embd_head = hparams.n_embd_head_v; @@ -6094,6 +7186,8 @@ struct llm_build_bloom : public llm_graph_context { LLM_NORM, -1); cb(inpL, "inp_norm", -1); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -6126,9 +7220,7 @@ struct llm_build_bloom : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -6205,6 +7297,8 @@ struct llm_build_mpt : public llm_graph_context { cb(inpL, "inpL", -1); } + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * attn_norm; @@ -6231,8 +7325,8 @@ struct llm_build_mpt : public llm_graph_context { cb(cur, "wqkv_clamped", il); } - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + ggml_tensor * Qcur = ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd)); + ggml_tensor * Kcur = ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd)); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); cb(Qcur, "Qcur", il); @@ -6252,6 +7346,12 @@ struct llm_build_mpt : public llm_graph_context { model.layers[il].attn_k_norm_b, LLM_NORM, il); cb(Kcur, "Kcur", il); + } else { + Qcur = ggml_cont(ctx0, Qcur); + cb(Qcur, "Qcur", il); + + Kcur = ggml_cont(ctx0, Kcur); + cb(Kcur, "Kcur", il); } Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); @@ -6267,9 +7367,7 @@ struct llm_build_mpt : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -6338,6 +7436,8 @@ struct llm_build_stablelm : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { // norm cur = build_norm(inpL, @@ -6413,9 +7513,7 @@ struct llm_build_stablelm : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); @@ -6490,6 +7588,8 @@ struct llm_build_qwen : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -6506,12 +7606,10 @@ struct llm_build_qwen : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 2*sizeof(float)*(n_embd))); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); // using mode = 2 for neox mode @@ -6536,9 +7634,7 @@ struct llm_build_qwen : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -6607,6 +7703,8 @@ struct llm_build_qwen2 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -6656,9 +7754,7 @@ struct llm_build_qwen2 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -6701,6 +7797,10 @@ struct llm_build_qwen2 : public llm_graph_context { // lm_head cur = build_lora_mm(model.output, cur); + if (model.output_b != nullptr) { + cur = ggml_add(ctx0, cur, model.output_b); + } + cb(cur, "result_output", -1); res->t_logits = cur; @@ -6708,8 +7808,10 @@ struct llm_build_qwen2 : public llm_graph_context { } }; -struct llm_build_qwen2vl : public llm_graph_context { - llm_build_qwen2vl(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { +struct llm_build_dream : public llm_graph_context { + llm_build_dream(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : + llm_graph_context(params) { + //copied from qwen2 const int64_t n_embd_head = hparams.n_embd_head_v; GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); @@ -6723,11 +7825,114 @@ struct llm_build_qwen2vl : public llm_graph_context { // inp_pos - contains the positions ggml_tensor * inp_pos = build_inp_pos(); - auto * inp_attn = build_attn_inp_kv_unified(); + auto * inp_attn = build_attn_inp_no_cache(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext(ctx0, Qcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow); + + Kcur = ggml_rope_ext(ctx0, Kcur, inp_pos, nullptr, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, model.layers[il].wo, model.layers[il].bo, Qcur, Kcur, Vcur, nullptr, + nullptr, 1.0f / sqrtf(float(n_embd_head)), il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, model.layers[il].ffn_up, NULL, NULL, model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_qwen2vl : public llm_graph_context { + llm_build_qwen2vl(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); int sections[4]; std::copy(std::begin(hparams.rope_sections), std::begin(hparams.rope_sections) + 4, sections); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -6777,9 +7982,7 @@ struct llm_build_qwen2vl : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -6846,6 +8049,8 @@ struct llm_build_qwen2moe : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -6904,9 +8109,7 @@ struct llm_build_qwen2moe : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -7005,6 +8208,8 @@ struct llm_build_qwen3 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -7057,9 +8262,7 @@ struct llm_build_qwen3 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -7126,6 +8329,8 @@ struct llm_build_qwen3moe : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -7178,9 +8383,7 @@ struct llm_build_qwen3moe : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -7256,6 +8459,8 @@ struct llm_build_phi2 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { attn_norm_output = build_norm(inpL, model.layers[il].attn_norm, @@ -7276,21 +8481,21 @@ struct llm_build_phi2 : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); } else { Qcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wq, attn_norm_output), model.layers[il].bq); Kcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wk, attn_norm_output), model.layers[il].bk); Vcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wv, attn_norm_output), model.layers[il].bv); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); } cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); cb(Vcur, "Vcur", il); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -7318,9 +8523,7 @@ struct llm_build_phi2 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); attn_norm_output = ggml_get_rows(ctx0, attn_norm_output, inp_out_ids); @@ -7392,6 +8595,8 @@ struct llm_build_phi3 : public llm_graph_context { inp_attn = build_attn_inp_kv_unified(); } + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { auto * residual = inpL; @@ -7414,21 +8619,21 @@ struct llm_build_phi3 : public llm_graph_context { cur = build_lora_mm(model.layers[il].wqkv, attn_norm_output); cb(cur, "wqkv", il); - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0 * sizeof(float) * (n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1 * sizeof(float) * (n_embd))); + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head * sizeof(float), cur->nb[1], 0 * sizeof(float) * (n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head * sizeof(float), cur->nb[1], 1 * sizeof(float) * (n_embd)); Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1 * sizeof(float) * (n_embd + n_embd_gqa))); } else { Qcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wq, attn_norm_output), model.layers[il].bq); Kcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wk, attn_norm_output), model.layers[il].bk); Vcur = ggml_add(ctx0, build_lora_mm(model.layers[il].wv, attn_norm_output), model.layers[il].bv); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); } cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); cb(Vcur, "Vcur", il); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -7455,9 +8660,7 @@ struct llm_build_phi3 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor* inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); residual = ggml_get_rows(ctx0, residual, inp_out_ids); } @@ -7543,15 +8746,16 @@ struct llm_build_plamo : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); - for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { // norm cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); cb(cur, "attn_norm", il); - ggml_tensor * attention_norm = cur; + ggml_tensor * sa_inp = cur; // self-attention { @@ -7589,18 +8793,17 @@ struct llm_build_plamo : public llm_graph_context { model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - ggml_tensor * sa_out = cur; - - cur = attention_norm; - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); - sa_out = ggml_get_rows(ctx0, sa_out, inp_out_ids); + sa_inp = ggml_get_rows(ctx0, sa_inp, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } + ggml_tensor * sa_out = cur; + + cur = sa_inp; + // feed-forward network { cur = build_ffn(cur, @@ -7665,6 +8868,8 @@ struct llm_build_gpt2 : public llm_graph_context { inpL = ggml_add(ctx0, inpL, pos); cb(inpL, "inpL", -1); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -7697,9 +8902,7 @@ struct llm_build_gpt2 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -7769,6 +8972,8 @@ struct llm_build_codeshell : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -7784,12 +8989,10 @@ struct llm_build_codeshell : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -7813,9 +9016,7 @@ struct llm_build_codeshell : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -7869,133 +9070,6 @@ struct llm_build_codeshell : public llm_graph_context { struct llm_build_orion : public llm_graph_context { llm_build_orion(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - const int64_t n_embd_head = hparams.n_embd_head_v; - - GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); - GGML_ASSERT(n_embd_head == hparams.n_rot); - - ggml_tensor * cur; - ggml_tensor * inpL; - - inpL = build_inp_embd(model.tok_embd); - - // inp_pos - contains the positions - ggml_tensor * inp_pos = build_inp_pos(); - - auto * inp_attn = build_attn_inp_kv_unified(); - - for (int il = 0; il < n_layer; ++il) { - ggml_tensor * inpSA = inpL; - - // norm - cur = build_norm(inpL, - model.layers[il].attn_norm, model.layers[il].attn_norm_b, - LLM_NORM, il); - cb(cur, "attn_norm", il); - - // self-attention - { - // compute Q and K and RoPE them - ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); - cb(Qcur, "Qcur", il); - // if (model.layers[il].bq) { - // Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - // cb(Qcur, "Qcur", il); - // } - - ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); - cb(Kcur, "Kcur", il); - // if (model.layers[il].bk) { - // Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); - // cb(Kcur, "Kcur", il); - // } - - ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); - cb(Vcur, "Vcur", il); - // if (model.layers[il].bv) { - // Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); - // cb(Vcur, "Vcur", il); - // } - - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); - Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - - Qcur = ggml_rope_ext( - ctx0, Qcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - - Kcur = ggml_rope_ext( - ctx0, Kcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - - cb(Qcur, "Qcur", il); - cb(Kcur, "Kcur", il); - cb(Vcur, "Vcur", il); - - cur = build_attn(inp_attn, gf, - model.layers[il].wo, NULL, - Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - } - - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); - } - - ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); - cb(ffn_inp, "ffn_inp", il); - - // feed-forward network - cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, - LLM_NORM, il); - cb(cur, "ffn_norm", il); - - cur = build_ffn(cur, - model.layers[il].ffn_up, NULL, NULL, - model.layers[il].ffn_gate, NULL, NULL, - model.layers[il].ffn_down, NULL, NULL, - NULL, - LLM_FFN_SILU, LLM_FFN_PAR, il); - cb(cur, "ffn_out", il); - - cur = ggml_add(ctx0, cur, ffn_inp); - - cur = build_cvec(cur, il); - cb(cur, "l_out", il); - - // input for next layer - inpL = cur; - } - - cur = inpL; - - cur = build_norm(cur, - model.output_norm, model.output_norm_b, - LLM_NORM, -1); - - cb(cur, "result_norm", -1); - res->t_embd = cur; - - // lm_head - cur = build_lora_mm(model.output, cur); - - cb(cur, "result_output", -1); - res->t_logits = cur; - - ggml_build_forward_expand(gf, cur); - } -}; - -struct llm_build_internlm2 : public llm_graph_context { - llm_build_internlm2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { const int64_t n_embd_head = hparams.n_embd_head_v; GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); @@ -8011,13 +9085,15 @@ struct llm_build_internlm2 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; // norm cur = build_norm(inpL, - model.layers[il].attn_norm, NULL, - LLM_NORM_RMS, il); + model.layers[il].attn_norm, model.layers[il].attn_norm_b, + LLM_NORM, il); cb(cur, "attn_norm", il); // self-attention @@ -8025,24 +9101,24 @@ struct llm_build_internlm2 : public llm_graph_context { // compute Q and K and RoPE them ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); cb(Qcur, "Qcur", il); - if (model.layers[il].bq) { - Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - cb(Qcur, "Qcur", il); - } + // if (model.layers[il].bq) { + // Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + // cb(Qcur, "Qcur", il); + // } ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); cb(Kcur, "Kcur", il); - if (model.layers[il].bk) { - Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); - cb(Kcur, "Kcur", il); - } + // if (model.layers[il].bk) { + // Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + // cb(Kcur, "Kcur", il); + // } ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); cb(Vcur, "Vcur", il); - if (model.layers[il].bv) { - Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); - cb(Vcur, "Vcur", il); - } + // if (model.layers[il].bv) { + // Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + // cb(Vcur, "Vcur", il); + // } Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); @@ -8065,13 +9141,11 @@ struct llm_build_internlm2 : public llm_graph_context { cb(Vcur, "Vcur", il); cur = build_attn(inp_attn, gf, - model.layers[il].wo, model.layers[il].bo, + model.layers[il].wo, NULL, Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -8081,8 +9155,8 @@ struct llm_build_internlm2 : public llm_graph_context { // feed-forward network cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, il); + model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, + LLM_NORM, il); cb(cur, "ffn_norm", il); cur = build_ffn(cur, @@ -8105,8 +9179,8 @@ struct llm_build_internlm2 : public llm_graph_context { cur = inpL; cur = build_norm(cur, - model.output_norm, NULL, - LLM_NORM_RMS, -1); + model.output_norm, model.output_norm_b, + LLM_NORM, -1); cb(cur, "result_norm", -1); res->t_embd = cur; @@ -8121,17 +9195,144 @@ struct llm_build_internlm2 : public llm_graph_context { } }; -struct llm_build_minicpm3 : public llm_graph_context { - llm_build_minicpm3(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - //TODO: if the model varies, these parameters need to be read from the model - const int64_t n_embd_base = 256; - const float scale_embd = 12.0f; - const float scale_depth = 1.4f; - const float kq_scale = 1.0f / sqrtf(float(hparams.n_embd_head_k)); +struct llm_build_internlm2 : public llm_graph_context { + llm_build_internlm2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; - const uint32_t n_embd_head_qk_rope = hparams.n_rot; - const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot; - const uint32_t kv_lora_rank = hparams.n_lora_kv; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_minicpm3 : public llm_graph_context { + llm_build_minicpm3(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + //TODO: if the model varies, these parameters need to be read from the model + const int64_t n_embd_base = 256; + const float scale_embd = 12.0f; + const float scale_depth = 1.4f; + const float kq_scale = 1.0f / sqrtf(float(hparams.n_embd_head_k)); + + const uint32_t n_embd_head_qk_rope = hparams.n_rot; + const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot; + const uint32_t kv_lora_rank = hparams.n_lora_kv; ggml_tensor * cur; ggml_tensor * inpL; @@ -8147,6 +9348,8 @@ struct llm_build_minicpm3 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -8205,8 +9408,6 @@ struct llm_build_minicpm3 : public llm_graph_context { ggml_row_size(kv_pe_compresseed->type, kv_lora_rank)); cb(k_pe, "k_pe", il); - // TODO: the CUDA backend used to not support non-cont. (RMS) norm, investigate removing ggml_cont - kv_compressed = ggml_cont(ctx0, kv_compressed); kv_compressed = build_norm(kv_compressed, model.layers[il].attn_kv_a_norm, NULL, LLM_NORM_RMS, il); @@ -8233,12 +9434,6 @@ struct llm_build_minicpm3 : public llm_graph_context { v_states = ggml_cont(ctx0, v_states); cb(v_states, "v_states", il); - v_states = ggml_view_2d(ctx0, v_states, hparams.n_embd_head_v * n_head, n_tokens, - ggml_row_size(kv->type, hparams.n_embd_head_v * n_head), - 0); - cb(v_states, "v_states", il); - - q_pe = ggml_cont(ctx0, q_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this q_pe = ggml_rope_ext( ctx0, q_pe, inp_pos, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, @@ -8247,7 +9442,6 @@ struct llm_build_minicpm3 : public llm_graph_context { cb(q_pe, "q_pe", il); // shared RoPE key - k_pe = ggml_cont(ctx0, k_pe); // TODO: the CUDA backend used to not support non-cont. RoPE, investigate removing this k_pe = ggml_rope_ext( ctx0, k_pe, inp_pos, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, @@ -8266,15 +9460,13 @@ struct llm_build_minicpm3 : public llm_graph_context { q_states, k_states, v_states, nullptr, nullptr, kq_scale, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } // scale_res - scale the hidden states for residual connection - const float scale_res = scale_depth/sqrtf(float(n_layer)); + const float scale_res = scale_depth/sqrtf(float(n_layer)); // TODO: is this correct? cur = ggml_scale(ctx0, cur, scale_res); cb(cur, "hidden_scaled", il); @@ -8351,6 +9543,8 @@ struct llm_build_gemma : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { // norm cur = build_norm(inpL, @@ -8396,9 +9590,7 @@ struct llm_build_gemma : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -8467,6 +9659,8 @@ struct llm_build_gemma2_iswa : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified_iswa(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { // norm cur = build_norm(inpL, @@ -8511,18 +9705,16 @@ struct llm_build_gemma2_iswa : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + } + cur = build_norm(cur, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il); cb(cur, "attn_post_norm", il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); - } - ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL); cb(sa_out, "sa_out", il); @@ -8601,6 +9793,8 @@ struct llm_build_gemma3_iswa : public llm_graph_context { // TODO: is causal == true correct? might need some changes auto * inp_attn = build_attn_inp_kv_unified_iswa(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const float freq_base_l = model.get_rope_freq_base (cparams, il); const float freq_scale_l = model.get_rope_freq_scale(cparams, il); @@ -8653,18 +9847,16 @@ struct llm_build_gemma3_iswa : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f, il); } + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + } + cur = build_norm(cur, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il); cb(cur, "attn_post_norm", il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); - } - ggml_tensor * sa_out = ggml_add(ctx0, cur, inpL); cb(sa_out, "sa_out", il); @@ -8717,109 +9909,219 @@ struct llm_build_gemma3_iswa : public llm_graph_context { } }; -// TODO: move up next to build_starcoder -struct llm_build_starcoder2 : public llm_graph_context { - llm_build_starcoder2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - const int64_t n_embd_head = hparams.n_embd_head_v; - - GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); - GGML_ASSERT(n_embd_head == hparams.n_rot); - +struct llm_build_gemma3n_iswa : public llm_graph_context { + const llama_model & model; + ggml_cgraph * gf; + + const int64_t n_embd_head; + const int64_t n_embd_altup; + const int64_t n_altup; + const int i_altup_act; + const int n_layer_kv = 20; // number of layers having KV [KV_REUSE] + const int n_layer_sparsity = 10; // number of layers using activation sparsity + const float f_sparsity_std_mul = 1.6448533535003662f; // std_multiplier = normal_dist.icdf(0.95) + + llm_build_gemma3n_iswa(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) + : llm_graph_context(params), + model(model), + gf(gf), + n_embd_head(model.hparams.n_embd_head_k), + n_embd_altup(model.hparams.n_embd_altup), + n_altup(model.hparams.n_altup), + i_altup_act(model.hparams.i_altup_act) { ggml_tensor * cur; ggml_tensor * inpL; inpL = build_inp_embd(model.tok_embd); + // important: do not normalize weights for raw embeddings input (i.e. encoded image emdeddings) + if (ubatch.token) { + inpL = ggml_scale(ctx0, inpL, sqrtf(n_embd)); + cb(inpL, "inp_scaled", -1); + } + // inp_pos - contains the positions ggml_tensor * inp_pos = build_inp_pos(); - auto * inp_attn = build_attn_inp_kv_unified(); + // TODO: is causal == true correct? might need some changes + auto * inp_attn = build_attn_inp_kv_unified_iswa(); + + // inp_per_layer shape: [n_embd_altup, n_tokens, n_layer] + ggml_tensor * inp_per_layer = project_per_layer_inputs(inpL, get_per_layer_inputs()); + + // inpL now has only 1 altup, project it to the rest of the altups + // these "added" altups will be concat to the last dim of inpL + { + ggml_tensor * target_magnitude = calc_magnitude(inpL); + ggml_tensor * inp_repeated = ggml_repeat_4d(ctx0, inpL, n_embd, n_tokens, n_altup - 1, 1); + ggml_tensor * altup_added = ggml_mul_mat(ctx0, model.altup_proj, inp_repeated); // shape: [n_embd, n_tokens, n_altup - 1] + ggml_tensor * new_magnitude = calc_magnitude(altup_added); + altup_added = ggml_div(ctx0, + ggml_mul(ctx0, altup_added, target_magnitude), + new_magnitude); + inpL = ggml_concat(ctx0, inpL, altup_added, 2); // shape: [n_embd, n_tokens, n_altup] + cb(inpL, "inp_stacked", -1); + } + + // inpL now has shape: [n_embd, n_tokens, n_altup] + // inp_per_layer now has shape: [n_embd_altup, n_tokens, n_layer] for (int il = 0; il < n_layer; ++il) { - ggml_tensor * inpSA = inpL; + // this block is made to be closely resemble Gemma3p5DecoderLayer on python code + const bool has_kv = (il < n_layer_kv); + + const float freq_base_l = model.get_rope_freq_base (cparams, il); + const float freq_scale_l = model.get_rope_freq_scale(cparams, il); + + ggml_tensor * cur = inpL; // [n_embd, n_tokens, n_altup] + ggml_tensor * predictions = altup_predict(cur, il); // [n_embd, n_tokens, n_altup] + + // predicted value will go through self-attention and laurel + ggml_tensor * active_prediction = view_2d_slice(predictions, i_altup_act); // [n_embd, n_tokens] + cur = active_prediction; + cb(cur, "active_prediction", il); // norm - cur = build_norm(inpL, - model.layers[il].attn_norm, model.layers[il].attn_norm_b, - LLM_NORM, il); + cur = build_norm(cur, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); cb(cur, "attn_norm", il); + // laurel + ggml_tensor * laurel_out = laurel(cur, il); // [n_embd, n_tokens] + // self-attention - { + if (has_kv) { // compute Q and K and RoPE them ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); cb(Qcur, "Qcur", il); - if (model.layers[il].bq) { - Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - cb(Qcur, "Qcur", il); - } ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); cb(Kcur, "Kcur", il); - if (model.layers[il].bk) { - Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); - cb(Kcur, "Kcur", il); - } ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); cb(Vcur, "Vcur", il); - if (model.layers[il].bv) { - Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); - cb(Vcur, "Vcur", il); - } Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il); + Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il); + Vcur = ggml_rms_norm(ctx0, Vcur, hparams.f_norm_rms_eps); + + cb(Qcur, "Qcur_normed", il); + cb(Kcur, "Kcur_normed", il); + cb(Vcur, "Vcur_normed", il); + Qcur = ggml_rope_ext( ctx0, Qcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); + n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l, + ext_factor, attn_factor, beta_fast, beta_slow); Kcur = ggml_rope_ext( ctx0, Kcur, inp_pos, nullptr, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); + n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l, + ext_factor, attn_factor, beta_fast, beta_slow); - cb(Qcur, "Qcur", il); - cb(Kcur, "Kcur", il); - cb(Vcur, "Vcur", il); + cb(Qcur, "Qcur_pos", il); + cb(Kcur, "Kcur_pos", il); cur = build_attn(inp_attn, gf, - model.layers[il].wo, model.layers[il].bo, - Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - } + model.layers[il].wo, NULL, + Qcur, Kcur, Vcur, nullptr, nullptr, hparams.f_attention_scale, il); + } else { + // no KV layers + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); - } + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il); + cb(Qcur, "Qcur_normed", il); - ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); - cb(ffn_inp, "ffn_inp", il); + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base_l, freq_scale_l, + ext_factor, attn_factor, beta_fast, beta_slow); + cb(Qcur, "Qcur_pos", il); - // feed-forward network + cur = build_attn(inp_attn, gf, + model.layers[il].wo, NULL, + Qcur, nullptr, nullptr, nullptr, nullptr, hparams.f_attention_scale, il); + } - cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, - LLM_NORM, il); + cur = build_norm(cur, + model.layers[il].attn_post_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_post_norm", il); + + cur = ggml_add(ctx0, cur, active_prediction); // [n_embd, n_tokens] + cb(cur, "attn_gated", il); + + ggml_tensor * attn_laurel = ggml_scale(ctx0, + ggml_add(ctx0, cur, laurel_out), + 1.0f / sqrtf(2.0f)); // [n_embd, n_tokens] + cb(attn_laurel, "attn_laurel", il); + + cur = build_norm(attn_laurel, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); cb(cur, "ffn_norm", il); - cur = build_ffn(cur, - model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, - NULL, NULL, NULL, - model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, - NULL, - LLM_FFN_GELU, LLM_FFN_SEQ, il); - cb(cur, "ffn_out", il); + // feed-forward network + { + ggml_tensor * up_proj = build_lora_mm(model.layers[il].ffn_up, cur); + ggml_tensor * gate_proj = build_lora_mm(model.layers[il].ffn_gate, cur); - cur = ggml_add(ctx0, cur, ffn_inp); + if (il < n_layer_sparsity) { + // apply activation sparsity + gate_proj = gaussian_topk(gate_proj); + } + gate_proj = ggml_gelu(ctx0, gate_proj); + + cur = ggml_mul(ctx0, up_proj, gate_proj); + cur = build_lora_mm(model.layers[il].ffn_down, cur); + cb(cur, "ffn_out", il); + } + + cur = build_norm(cur, + model.layers[il].ffn_post_norm, NULL, + LLM_NORM_RMS, -1); + cb(cur, "ffn_post_norm", il); + + ggml_tensor * attn_ffw_laurel_gated = ggml_add(ctx0, cur, attn_laurel); // [n_embd, n_tokens] + cb(attn_ffw_laurel_gated, "attn_ffw_laurel_gated", il); + + ggml_tensor * corrected = altup_correct(predictions, attn_ffw_laurel_gated, il); // [n_embd, n_tokens, n_altup] + + ggml_tensor * first_prediction; // [n_embd, n_tokens] + { + first_prediction = view_2d_slice(corrected, i_altup_act); // [n_embd, n_tokens] + first_prediction = ggml_mul(ctx0, first_prediction, model.layers[il].altup_correct_scale); + first_prediction = build_lora_mm(model.layers[il].per_layer_inp_gate, first_prediction); + first_prediction = ggml_gelu(ctx0, first_prediction); // [n_embd_altup, n_tokens] + cb(first_prediction, "first_prediction_gated", il); + ggml_tensor * inp_this_layer = view_2d_slice(inp_per_layer, il); // [n_embd_altup, n_tokens] + first_prediction = ggml_mul(ctx0, first_prediction, inp_this_layer); // [n_embd_altup, n_tokens] + cb(first_prediction, "first_prediction_scaled", il); + + first_prediction = build_lora_mm(model.layers[il].per_layer_proj, first_prediction); // [n_embd, n_tokens] + first_prediction = build_norm(first_prediction, + model.layers[il].per_layer_post_norm, NULL, + LLM_NORM_RMS, il); + cb(first_prediction, "first_prediction_out", il); + } + + // equivalent to python code: corrected_predictions[1:] += first_prediction + { + ggml_tensor * slice_first = view_2d_slice(corrected, 0); + ggml_tensor * slice_rest = ggml_view_3d(ctx0, corrected, n_embd, n_tokens, n_altup - 1, + ggml_row_size(corrected->type, n_embd), + ggml_row_size(corrected->type, n_embd*n_tokens), + n_embd*n_tokens*ggml_element_size(corrected)); + ggml_tensor * tmp = ggml_add(ctx0, slice_rest, first_prediction); // [n_embd, n_tokens, n_altup - 1] + corrected = ggml_concat(ctx0, slice_first, tmp, 2); // [n_embd, n_tokens, n_altup] + } + cur = corrected; // [n_embd, n_tokens, n_altup] cur = build_cvec(cur, il); cb(cur, "l_out", il); @@ -8827,55 +10129,314 @@ struct llm_build_starcoder2 : public llm_graph_context { inpL = cur; } - cur = inpL; + cur = inpL; // [n_embd, n_tokens, n_altup] + + // cur now has multiple altup(s), we want to merge them back to 1 altup + { + ggml_tensor * target_magnitude = calc_magnitude(view_2d_slice(cur, i_altup_act)); // [n_embd, n_tokens] + // do a view to skip the first slice (active altup) + ggml_tensor * alt_slice = ggml_view_3d(ctx0, cur, n_embd, n_tokens, n_altup - 1, + ggml_row_size(cur->type, n_embd), + ggml_row_size(cur->type, n_embd*n_tokens), + n_embd*n_tokens*ggml_element_size(cur)); + ggml_tensor * altup_unembd = ggml_mul_mat(ctx0, model.altup_unembd_proj, alt_slice); // shape: [n_embd, n_tokens, n_altup - 1] + ggml_tensor * new_magnitude = calc_magnitude(altup_unembd); + altup_unembd = ggml_div(ctx0, + ggml_mul(ctx0, altup_unembd, target_magnitude), + new_magnitude); + cb(altup_unembd, "altup_unembd", -1); + + // equivalent to torch.mean(hidden_states, dim=0) + cur = view_2d_slice(cur, 0); // [n_embd, n_tokens] + for (int i = 0; i < n_altup - 1; ++i) { + cur = ggml_add(ctx0, cur, view_2d_slice(altup_unembd, i)); + } + cur = ggml_scale(ctx0, cur, 1.0f / float(n_altup)); // [n_embd, n_tokens] + cb(cur, "unembd_merged", -1); + } + + // cur now has shape: [n_embd, n_tokens] + + // TODO: move this to right after the last KV layer + { + // skip computing output for unused tokens + ggml_tensor * inp_out_ids = build_inp_out_ids(); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + } cur = build_norm(cur, - model.output_norm, model.output_norm_b, - LLM_NORM, -1); + model.output_norm, NULL, + LLM_NORM_RMS, -1); cb(cur, "result_norm", -1); res->t_embd = cur; - // lm_head cur = build_lora_mm(model.output, cur); + { + // final logit soft-capping + cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping); + cur = ggml_tanh(ctx0, cur); + cur = ggml_scale(ctx0, cur, hparams.f_final_logit_softcapping); + } + cb(cur, "result_output", -1); res->t_logits = cur; ggml_build_forward_expand(gf, cur); } + + ggml_tensor * calc_magnitude(ggml_tensor * x) { + return ggml_sqrt(ctx0, ggml_sum_rows(ctx0, ggml_sqr(ctx0, x))); + } + + // get 2D slice view from a 3D tensor, the idx corresponds to the 3rd dim + ggml_tensor * view_2d_slice(ggml_tensor * x, int idx) { + GGML_ASSERT(idx < (int)x->ne[2]); + return ggml_view_2d(ctx0, x, x->ne[0], x->ne[1], + ggml_row_size(x->type, x->ne[0]), + idx * x->ne[0] * x->ne[1] * ggml_element_size(x)); + } + + // equivalent to get_per_layer_inputs() in python code + // output shape: [n_embd_altup, n_layer, n_tokens] + ggml_tensor * get_per_layer_inputs() { + auto inp = std::make_unique(); + ggml_tensor * inp_per_layer; + if (ubatch.token) { + inp->tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, ubatch.n_tokens); + ggml_set_input(inp->tokens); + res->t_tokens = inp->tokens; + inp_per_layer = ggml_get_rows(ctx0, model.tok_embd_per_layer, inp->tokens); + inp_per_layer = ggml_reshape_3d(ctx0, inp_per_layer, n_embd_altup, n_layer, n_tokens); + inp_per_layer = ggml_scale(ctx0, inp_per_layer, sqrtf((float)n_embd_altup)); + cb(inp_per_layer, "inp_per_layer_selected", -1); + } else { + GGML_ABORT("TODO: support embd input"); + } + res->add_input(std::move(inp)); + return inp_per_layer; + } + + // equivalent to project_per_layer_inputs() in python code + // this calculates the per-layer inputs, so the final tensor shape will have n_layer as the last dim + // output shape: [n_embd_altup, n_tokens, n_layer] + ggml_tensor * project_per_layer_inputs(ggml_tensor * inputs_embeds, ggml_tensor * inp_per_layer) { + const float per_layer_projection_scale = 1.0f / sqrtf((float)n_embd); + const float per_layer_input_scale = 1.0f / sqrtf(2.0f); + + ggml_tensor * per_layer_proj = ggml_mul_mat(ctx0, model.per_layer_model_proj, inputs_embeds); + per_layer_proj = ggml_scale(ctx0, per_layer_proj, per_layer_projection_scale); + per_layer_proj = ggml_reshape_3d(ctx0, per_layer_proj, n_embd_altup, n_layer, n_tokens); + per_layer_proj = build_norm(per_layer_proj, + model.per_layer_proj_norm, NULL, + LLM_NORM_RMS, -1); // [n_embd_altup, n_layer, n_tokens] + cb(per_layer_proj, "per_layer_proj", -1); + + inp_per_layer = ggml_add(ctx0, inp_per_layer, per_layer_proj); + inp_per_layer = ggml_scale(ctx0, inp_per_layer, per_layer_input_scale); + cb(inp_per_layer, "inp_per_layer", -1); + + // permute to shape: [n_embd_altup, n_tokens, n_layer] + inp_per_layer = ggml_cont(ctx0, ggml_permute(ctx0, inp_per_layer, 0, 2, 1, 3)); + return inp_per_layer; + } + + // input cur shape: [n_altup, n_tokens] + // output shape: [n_altup, n_tokens] + ggml_tensor * laurel(ggml_tensor * cur, int il) { + ggml_tensor * tmp = cur; + tmp = build_lora_mm(model.layers[il].laurel_l, tmp); + tmp = build_lora_mm(model.layers[il].laurel_r, tmp); + tmp = build_norm(tmp, model.layers[il].laurel_post_norm, NULL, LLM_NORM_RMS, il); + tmp = ggml_add(ctx0, tmp, cur); + cb(tmp, "laurel_out", il); + return tmp; + } + + // input x shape: [n_embd, n_tokens] + // output shape: [n_embd, n_tokens] + ggml_tensor * gaussian_topk(ggml_tensor * x) { + ggml_tensor * mean = ggml_mean(ctx0, x); + ggml_tensor * std = ggml_sqrt(ctx0, ggml_scale(ctx0, + ggml_sum_rows(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x, mean))), + 1.0f / (float)(x->ne[0] - 1) + )); + ggml_tensor * cutoff_x = ggml_add(ctx0, mean, ggml_scale(ctx0, std, f_sparsity_std_mul)); + return ggml_relu(ctx0, ggml_sub(ctx0, x, cutoff_x)); + } + + // + // altup functions + // + + // equivalent to compute_router_modalities() in python code + // input x shape: [n_embd, n_tokens] + // output shape: [n_altup, n_tokens] + ggml_tensor * altup_compute_router_modalities(ggml_tensor * x, int il) { + ggml_tensor * router_inputs = build_norm(x, + model.layers[il].altup_router_norm, NULL, + LLM_NORM_RMS, il); + + // router_input_scale + router_inputs = ggml_scale(ctx0, router_inputs, 1.0f / (float)n_embd); + + ggml_tensor * output = ggml_mul_mat(ctx0, model.layers[il].altup_router, router_inputs); + return ggml_tanh(ctx0, output); // [n_altup, n_tokens] + } + + // input cur shape: [n_embd, n_tokens, n_altup] + // output shape: [n_embd, n_tokens, n_altup] + ggml_tensor * altup_predict(ggml_tensor * cur, int il) { + ggml_tensor * activated = view_2d_slice(cur, i_altup_act); // [n_embd, n_tokens] + ggml_tensor * modalities = altup_compute_router_modalities(activated, il); // [n_altup, n_tokens] + cb(modalities, "modalities", il); + + ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_predict_coef, modalities); + cb(all_coefs, "all_coefs", il); + // first dim now having n_altup^2 elements, we reshape it to 2D (so we end up with 3D tensor) + all_coefs = ggml_reshape_3d(ctx0, all_coefs, n_altup, n_altup, n_tokens); + + // permute to [n_altup, n_embd, n_tokens] + ggml_tensor * cur_permuted = ggml_cont(ctx0, ggml_permute(ctx0, cur, 1, 2, 0, 3)); + ggml_tensor * predictions = ggml_mul_mat(ctx0, cur_permuted, all_coefs); // [n_altup, n_embd, n_tokens] + + // final shape must be the same as cur: [n_embd, n_tokens, n_altup] + predictions = ggml_cont(ctx0, ggml_permute(ctx0, predictions, 0, 2, 1, 3)); + predictions = ggml_add(ctx0, predictions, cur); + cb(predictions, "predictions", il); + + return predictions; + } + + // input predictions shape: [n_embd, n_tokens, n_altup] + // input activated shape: [n_embd, n_tokens] + // output shape: [n_embd, n_tokens, n_altup] + ggml_tensor * altup_correct(ggml_tensor * predictions, ggml_tensor * activated, int il) { + ggml_tensor * modalities = altup_compute_router_modalities(activated, il); // [n_altup, n_tokens] + cb(modalities, "modalities", il); + + ggml_tensor * active_prediction = view_2d_slice(predictions, i_altup_act); + ggml_tensor * innovation = ggml_sub(ctx0, activated, active_prediction); // [n_embd, n_tokens] + cb(innovation, "innovation", il); + + ggml_tensor * all_coefs = build_lora_mm(model.layers[il].altup_correct_coef, modalities); // [n_altup, n_tokens] + all_coefs = ggml_scale_bias(ctx0, all_coefs, 1.0f, 1.0f); // + 1.0 + cb(all_coefs, "all_coefs", il); + all_coefs = ggml_cont(ctx0, ggml_transpose(ctx0, all_coefs)); // [n_tokens, n_altup] + all_coefs = ggml_reshape_3d(ctx0, all_coefs, 1, n_tokens, n_altup); // [1, n_tokens, n_altup] + + innovation = ggml_repeat_4d(ctx0, innovation, n_embd, n_tokens, n_altup, 1); + ggml_tensor * corrected = ggml_mul(ctx0, innovation, all_coefs); // [n_embd, n_tokens, n_altup] + corrected = ggml_add(ctx0, corrected, predictions); // [n_embd, n_tokens, n_altup] + cb(corrected, "corrected", il); + + return corrected; + } }; -struct llm_build_mamba : public llm_graph_context { - const llama_model & model; +// TODO: move up next to build_starcoder +struct llm_build_starcoder2 : public llm_graph_context { + llm_build_starcoder2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); - llm_build_mamba(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params), model(model) { ggml_tensor * cur; ggml_tensor * inpL; - // {n_embd, n_tokens} inpL = build_inp_embd(model.tok_embd); - ggml_tensor * state_copy = build_inp_s_copy(); + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + // norm cur = build_norm(inpL, - model.layers[il].attn_norm, NULL, - LLM_NORM_RMS, il); + model.layers[il].attn_norm, model.layers[il].attn_norm_b, + LLM_NORM, il); cb(cur, "attn_norm", il); - cur = build_mamba_layer(gf, cur, state_copy, ubatch, il); + // self-attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - // residual - cur = ggml_add(ctx0, cur, inpL); + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, model.layers[il].ffn_norm_b, + LLM_NORM, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, + NULL, + LLM_FFN_GELU, LLM_FFN_SEQ, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); cur = build_cvec(cur, il); cb(cur, "l_out", il); @@ -8884,10 +10445,11 @@ struct llm_build_mamba : public llm_graph_context { inpL = cur; } - // final rmsnorm - cur = build_norm(inpL, - model.output_norm, NULL, - LLM_NORM_RMS, -1); + cur = inpL; + + cur = build_norm(cur, + model.output_norm, model.output_norm_b, + LLM_NORM, -1); cb(cur, "result_norm", -1); res->t_embd = cur; @@ -8900,52 +10462,52 @@ struct llm_build_mamba : public llm_graph_context { ggml_build_forward_expand(gf, cur); } +}; + +struct llm_graph_context_mamba : public llm_graph_context { + llm_graph_context_mamba(const llm_graph_params & params) : llm_graph_context(params) {} - // TODO: split ggml_tensor * build_mamba_layer( - ggml_cgraph * gf, - ggml_tensor * cur, - ggml_tensor * state_copy, - const llama_ubatch & ubatch, - int il) const { - const auto * kv_state = static_cast(mstate); + llm_graph_input_rs * inp, + ggml_cgraph * gf, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il) { - const auto kv_head = kv_state->get_head(); + const auto * mctx_cur = inp->mctx; + + const auto kv_head = mctx_cur->get_head(); + + const auto & layer = model.layers[il]; const int64_t d_conv = hparams.ssm_d_conv; const int64_t d_inner = hparams.ssm_d_inner; const int64_t d_state = hparams.ssm_d_state; const int64_t dt_rank = hparams.ssm_dt_rank; + const int64_t n_head = d_inner; + const int64_t head_dim = 1; const int64_t n_seqs = ubatch.n_seqs; // Some variants of Mamba arch (e.g. FalconMamba do apply layer norm on B and Dt layers) const bool ssm_dt_b_c_rms = hparams.ssm_dt_b_c_rms; - // Use the same RMS norm as the final layer norm - const float norm_rms_eps = hparams.f_norm_rms_eps; const int64_t n_seq_tokens = ubatch.n_seq_tokens; GGML_ASSERT(n_seqs != 0); - GGML_ASSERT(ubatch.equal_seqs); + GGML_ASSERT(ubatch.equal_seqs()); GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs); - ggml_tensor * conv_states_all = kv_state->get_k_l(il); - ggml_tensor * ssm_states_all = kv_state->get_v_l(il); + ggml_tensor * conv_states_all = mctx_cur->get_r_l(il); + ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il); - // (ab)using the KV cache to store the states - ggml_tensor * conv = build_recurrent_state( - gf, conv_states_all, state_copy, - hparams.n_embd_k_s(), n_seqs); + ggml_tensor * conv = build_rs(inp, gf, conv_states_all, hparams.n_embd_r(), n_seqs); conv = ggml_reshape_3d(ctx0, conv, d_conv - 1, d_inner, n_seqs); - ggml_tensor * ssm = build_recurrent_state( - gf, ssm_states_all, state_copy, - hparams.n_embd_v_s(), n_seqs); - ssm = ggml_reshape_3d(ctx0, ssm, d_state, d_inner, n_seqs); // {n_embd, n_tokens} => {n_embd, n_seq_tokens, n_seqs} cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], n_seq_tokens, n_seqs); // {n_embd, 2*d_inner} @ {n_embd, n_seq_tokens, n_seqs} => {2*d_inner, n_seq_tokens, n_seqs} - ggml_tensor * xz = build_lora_mm(model.layers[il].ssm_in, cur); + ggml_tensor * xz = build_lora_mm(layer.ssm_in, cur); // split the above in two // => {d_inner, n_seq_tokens, n_seqs} ggml_tensor * x = ggml_view_3d(ctx0, xz, d_inner, xz->ne[1], xz->ne[2], xz->nb[1], xz->nb[2], 0); @@ -8974,10 +10536,10 @@ struct llm_build_mamba : public llm_graph_context { // then permute away the ne[0] dimension, // and then you're left with the resulting x tensor. // For simultaneous sequences, all sequences need to have the same length. - x = ggml_ssm_conv(ctx0, conv_x, model.layers[il].ssm_conv1d); + x = ggml_ssm_conv(ctx0, conv_x, layer.ssm_conv1d); // bias - x = ggml_add(ctx0, x, model.layers[il].ssm_conv1d_b); + x = ggml_add(ctx0, x, layer.ssm_conv1d_b); x = ggml_silu(ctx0, x); } @@ -8985,90 +10547,403 @@ struct llm_build_mamba : public llm_graph_context { // ssm { // {d_inner, dt_rank + 2*d_state} @ {d_inner, n_seq_tokens, n_seqs} => {dt_rank + 2*d_state, n_seq_tokens, n_seqs} - ggml_tensor * x_db = build_lora_mm(model.layers[il].ssm_x, x); + ggml_tensor * x_db = build_lora_mm(layer.ssm_x, x); // split ggml_tensor * dt = ggml_view_3d(ctx0, x_db, dt_rank, n_seq_tokens, n_seqs, x_db->nb[1], x_db->nb[2], 0); - ggml_tensor * B = ggml_view_3d(ctx0, x_db, d_state, n_seq_tokens, n_seqs, x_db->nb[1], x_db->nb[2], ggml_element_size(x_db)*dt_rank); - ggml_tensor * C = ggml_view_3d(ctx0, x_db, d_state, n_seq_tokens, n_seqs, x_db->nb[1], x_db->nb[2], ggml_element_size(x_db)*(dt_rank+d_state)); - - // Some Mamba variants (e.g. FalconMamba) apply RMS norm in B, C & Dt layers - if (ssm_dt_b_c_rms) { - dt = ggml_rms_norm(ctx0, dt, norm_rms_eps); - B = ggml_rms_norm(ctx0, B, norm_rms_eps); - C = ggml_rms_norm(ctx0, C, norm_rms_eps); + ggml_tensor * B = ggml_view_4d(ctx0, x_db, d_state, /* n_group */ 1, n_seq_tokens, n_seqs, d_state*x_db->nb[0], x_db->nb[1], x_db->nb[2], ggml_element_size(x_db)*dt_rank); + ggml_tensor * C = ggml_view_4d(ctx0, x_db, d_state, /* n_group */ 1, n_seq_tokens, n_seqs, d_state*x_db->nb[0], x_db->nb[1], x_db->nb[2], ggml_element_size(x_db)*(dt_rank+d_state)); + + // Some Mamba variants (e.g. FalconMamba, Jamba) apply RMS norm in B, C & Dt layers + if (ssm_dt_b_c_rms || (layer.ssm_dt_norm && layer.ssm_b_norm && layer.ssm_c_norm)) { + dt = build_norm(dt, layer.ssm_dt_norm, NULL, LLM_NORM_RMS, il); + B = build_norm(B, layer.ssm_b_norm, NULL, LLM_NORM_RMS, il); + C = build_norm(C, layer.ssm_c_norm, NULL, LLM_NORM_RMS, il); } // {dt_rank, d_inner} @ {dt_rank, n_seq_tokens, n_seqs} => {d_inner, n_seq_tokens, n_seqs} - dt = build_lora_mm(model.layers[il].ssm_dt, dt); - dt = ggml_add(ctx0, dt, model.layers[il].ssm_dt_b); + dt = build_lora_mm(layer.ssm_dt, dt); + dt = ggml_add(ctx0, dt, layer.ssm_dt_b); + + cur = x; + x = ggml_reshape_4d(ctx0, x, head_dim, n_head, n_seq_tokens, n_seqs); - // Custom operator to optimize the parallel associative scan - // as described in the Annex D of the Mamba paper. - // => {d_inner, n_seq_tokens, n_seqs} and {d_state, d_inner, n_seqs} - ggml_tensor * y_ssm = ggml_ssm_scan(ctx0, ssm, x, dt, model.layers[il].ssm_a, B, C); + ggml_tensor * A = layer.ssm_a; + + // use the states and the indices provided by build_recurrent_state + // (this is necessary in order to properly use the states before they are overwritten, + // while avoiding to make unnecessary copies of the states) + auto get_ssm_rows = [&](ggml_context * ctx, ggml_tensor * states, ggml_tensor * ids) { + ggml_tensor * ssm = ggml_reshape_4d(ctx, states, d_state, head_dim, n_head, mctx_cur->get_size()); + + // Custom operator to optimize the parallel associative scan + // as described in the Annex D of the Mamba paper. + // => {d_inner, n_seq_tokens, n_seqs} and {d_state, d_inner, n_seqs} + return ggml_ssm_scan(ctx, ssm, x, dt, A, B, C, ids); + }; + + ggml_tensor * y_ssm = build_rs(inp, gf, ssm_states_all, hparams.n_embd_s(), ubatch.n_seqs, get_ssm_rows); // store last states ggml_build_forward_expand(gf, ggml_cpy(ctx0, - ggml_view_1d(ctx0, y_ssm, d_state*d_inner*n_seqs, x->nb[3]), + ggml_view_1d(ctx0, y_ssm, d_state*d_inner*n_seqs, x->nb[3]*x->ne[3]), ggml_view_1d(ctx0, ssm_states_all, d_state*d_inner*n_seqs, kv_head*d_state*d_inner*ggml_element_size(ssm_states_all)))); - ggml_tensor * y = ggml_view_3d(ctx0, y_ssm, d_inner, n_seq_tokens, n_seqs, x->nb[1], x->nb[2], 0); + ggml_tensor * y = ggml_view_3d(ctx0, y_ssm, d_inner, n_seq_tokens, n_seqs, x->nb[2], x->nb[3], 0); // TODO: skip computing output earlier for unused tokens - // {d_inner, n_seq_tokens, n_seqs} * {d_inner} => {d_inner, n_seq_tokens, n_seqs} - y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d)); - y = ggml_mul(ctx0, y, ggml_silu(ctx0, ggml_cont(ctx0, z))); + y = ggml_add(ctx0, y, ggml_mul(ctx0, cur, layer.ssm_d)); + y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y); // {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs} - cur = build_lora_mm(model.layers[il].ssm_out, y); + cur = build_lora_mm(layer.ssm_out, y); } // {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens} cur = ggml_reshape_2d(ctx0, cur, cur->ne[0], n_seq_tokens * n_seqs); - //cb(cur, "mamba_out", il); return cur; } -}; -struct llm_build_command_r : public llm_graph_context { - llm_build_command_r(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - const int64_t n_embd_head = hparams.n_embd_head_v; + ggml_tensor * build_mamba2_layer( + llm_graph_input_rs * inp, + ggml_cgraph * gf, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il) const { - GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + const auto * mctx_cur = inp->mctx; - const float f_logit_scale = hparams.f_logit_scale; + const auto kv_head = mctx_cur->get_head(); - ggml_tensor * cur; - ggml_tensor * inpL; + const int64_t d_conv = hparams.ssm_d_conv; + const int64_t d_inner = hparams.ssm_d_inner; + const int64_t d_state = hparams.ssm_d_state; + const int64_t n_head = hparams.ssm_dt_rank; + const int64_t head_dim = d_inner / n_head; + const int64_t n_group = hparams.ssm_n_group; + const int64_t n_seqs = ubatch.n_seqs; - inpL = build_inp_embd(model.tok_embd); + const int64_t n_seq_tokens = ubatch.n_seq_tokens; - // inp_pos - contains the positions - ggml_tensor * inp_pos = build_inp_pos(); + GGML_ASSERT(n_seqs != 0); + GGML_ASSERT(ubatch.equal_seqs()); + GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs); - auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * conv_states_all = mctx_cur->get_r_l(il); + ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il); - for (int il = 0; il < n_layer; ++il) { + ggml_tensor * conv = build_rs(inp, gf, conv_states_all, hparams.n_embd_r(), n_seqs); + conv = ggml_reshape_3d(ctx0, conv, d_conv - 1, d_inner + 2*n_group*d_state, n_seqs); - // norm - cur = build_norm(inpL, - model.layers[il].attn_norm, NULL, - LLM_NORM, il); - cb(cur, "attn_norm", il); - ggml_tensor * ffn_inp = cur; + // {n_embd, n_tokens} => {n_embd, n_seq_tokens, n_seqs} + cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], n_seq_tokens, n_seqs); - // self-attention - { - // compute Q and K and RoPE them - ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); - cb(Qcur, "Qcur", il); - if (model.layers[il].bq) { - Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - cb(Qcur, "Qcur", il); - } + // d_in_proj = 2 * self.d_inner + 2 * self.ngroups * self.d_state + self.nheads + + // {n_embd, d_in_proj} @ {n_embd, n_seq_tokens, n_seqs} => {d_in_proj, n_seq_tokens, n_seqs} + ggml_tensor * zxBCdt = build_lora_mm(model.layers[il].ssm_in, cur); + + // split the above in three + ggml_tensor * z = ggml_view_4d(ctx0, zxBCdt, head_dim, n_head, n_seq_tokens, n_seqs, head_dim*zxBCdt->nb[0], zxBCdt->nb[1], zxBCdt->nb[2], 0); + ggml_tensor * xBC = ggml_view_3d(ctx0, zxBCdt, d_inner + 2*n_group*d_state, n_seq_tokens, n_seqs, zxBCdt->nb[1], zxBCdt->nb[2], d_inner*ggml_element_size(zxBCdt)); + ggml_tensor * dt = ggml_view_3d(ctx0, zxBCdt, n_head, n_seq_tokens, n_seqs, zxBCdt->nb[1], zxBCdt->nb[2], (2*d_inner + 2*n_group*d_state)*ggml_element_size(zxBCdt)); + + // conv + { + // => {d_conv - 1 + n_seq_tokens, d_inner + 2*n_group*d_state, n_seqs} + ggml_tensor * conv_x = ggml_concat(ctx0, conv, ggml_transpose(ctx0, xBC), 0); + + // copy last (d_conv - 1) columns back into the state cache + ggml_tensor * last_conv = ggml_view_3d(ctx0, conv_x, d_conv - 1, d_inner + 2*n_group*d_state, n_seqs, conv_x->nb[1], conv_x->nb[2], n_seq_tokens*(conv_x->nb[0])); + + ggml_build_forward_expand(gf, + ggml_cpy(ctx0, last_conv, + ggml_view_1d(ctx0, conv_states_all, + (d_conv - 1)*(d_inner + 2*n_group*d_state)*(n_seqs), + kv_head*(d_conv - 1)*(d_inner + 2*n_group*d_state)*ggml_element_size(conv_states_all)))); + + // 1D convolution + // The equivalent is to make a self-overlapping view of conv_x + // over d_conv columns at each stride in the 3rd dimension, + // then element-wise multiply that with the conv1d weight, + // then sum the elements of each row, + // (the last two steps are a dot product over rows (also doable with mul_mat)) + // then permute away the ne[0] dimension, + // and then you're left with the resulting x tensor. + // For simultaneous sequences, all sequences need to have the same length. + xBC = ggml_ssm_conv(ctx0, conv_x, model.layers[il].ssm_conv1d); + + // bias + xBC = ggml_add(ctx0, xBC, model.layers[il].ssm_conv1d_b); + + xBC = ggml_silu(ctx0, xBC); + } + + // ssm + { + // These correspond to V K Q in SSM/attention duality + ggml_tensor * x = ggml_view_4d(ctx0, xBC, head_dim, n_head, n_seq_tokens, n_seqs, head_dim*xBC->nb[0], xBC->nb[1], xBC->nb[2], 0); + ggml_tensor * B = ggml_view_4d(ctx0, xBC, d_state, n_group, n_seq_tokens, n_seqs, d_state*xBC->nb[0], xBC->nb[1], xBC->nb[2], d_inner*ggml_element_size(xBC)); + ggml_tensor * C = ggml_view_4d(ctx0, xBC, d_state, n_group, n_seq_tokens, n_seqs, d_state*xBC->nb[0], xBC->nb[1], xBC->nb[2], (d_inner + n_group*d_state)*ggml_element_size(xBC)); + + // {n_head, n_seq_tokens, n_seqs} + dt = ggml_add(ctx0, ggml_cont(ctx0, dt), model.layers[il].ssm_dt_b); + + ggml_tensor * A = model.layers[il].ssm_a; + + // use the states and the indices provided by build_recurrent_state + // (this is necessary in order to properly use the states before they are overwritten, + // while avoiding to make unnecessary copies of the states) + auto get_ssm_rows = [&](ggml_context * ctx, ggml_tensor * states, ggml_tensor * ids) { + ggml_tensor * ssm = ggml_reshape_4d(ctx, states, d_state, head_dim, n_head, mctx_cur->get_size()); + + // TODO: use semistructured matrices to implement state-space duality + // => {d_inner, n_seq_tokens, n_seqs} and {d_state, d_inner, n_seqs} + return ggml_ssm_scan(ctx, ssm, x, dt, A, B, C, ids); + }; + + ggml_tensor * y_ssm = build_rs(inp, gf, ssm_states_all, hparams.n_embd_s(), ubatch.n_seqs, get_ssm_rows); + + // store last states + ggml_build_forward_expand(gf, + ggml_cpy(ctx0, + ggml_view_1d(ctx0, y_ssm, d_state*d_inner*n_seqs, ggml_nelements(x)*x->nb[0]), + ggml_view_1d(ctx0, ssm_states_all, d_state*d_inner*n_seqs, kv_head*d_state*d_inner*ggml_element_size(ssm_states_all)))); + + ggml_tensor * y = ggml_view_4d(ctx0, y_ssm, head_dim, n_head, n_seq_tokens, n_seqs, x->nb[1], n_head*x->nb[1], n_seq_tokens*n_head*x->nb[1], 0); + + // TODO: skip computing output earlier for unused tokens + + y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d)); + y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y); + + // grouped RMS norm + if (model.layers[il].ssm_norm) { + y = ggml_reshape_4d(ctx0, y, d_inner / n_group, n_group, n_seq_tokens, n_seqs); + y = build_norm(y, model.layers[il].ssm_norm, NULL, LLM_NORM_RMS, il); + } + + y = ggml_reshape_3d(ctx0, y, d_inner, n_seq_tokens, n_seqs); + + // {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs} + cur = build_lora_mm(model.layers[il].ssm_out, y); + } + + // {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens} + cur = ggml_reshape_2d(ctx0, cur, cur->ne[0], n_seq_tokens * n_seqs); + cb(cur, "mamba_out", il); + + return cur; + } +}; + +struct llm_build_mamba : public llm_graph_context_mamba { + llm_build_mamba(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context_mamba(params) { + ggml_tensor * cur; + ggml_tensor * inpL; + + // {n_embd, n_tokens} + inpL = build_inp_embd(model.tok_embd); + + auto * rs_inp = build_rs_inp(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + if (model.arch == LLM_ARCH_MAMBA2) { + cur = build_mamba2_layer(rs_inp, gf, cur, model, ubatch, il); + } else { + cur = build_mamba_layer(rs_inp, gf, cur, model, ubatch, il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + } + + // residual + cur = ggml_add(ctx0, cur, inpL); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + // final rmsnorm + cur = build_norm(inpL, model.output_norm, NULL, LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } + +}; + +struct llm_build_jamba : public llm_graph_context_mamba { + llm_build_jamba(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context_mamba(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + ggml_tensor * cur; + ggml_tensor * inpL; + + // {n_embd, n_tokens} + inpL = build_inp_embd(model.tok_embd); + + auto * inp_hybrid = build_inp_mem_hybrid(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + const int64_t n_head_kv = hparams.n_head_kv(il); + + cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + if (n_head_kv == 0) { + cur = build_mamba_layer(inp_hybrid->get_recr(), gf, cur, model, ubatch, il); + } else { + // Attention + + struct ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + struct ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + struct ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + // No RoPE :) + cur = build_attn(inp_hybrid->get_attn(), gf, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f/sqrtf(float(n_embd_head)), il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + } + + // residual + struct ggml_tensor * ffn_inp = ggml_add(ctx0, inpL, cur); + cb(cur, "ffn_inp", il); + + cur = build_norm(ffn_inp, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + // feed-forward network + if (model.layers[il].ffn_gate_inp == nullptr) { + // FFN + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + } else { + // MoE branch + cur = build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + nullptr, + n_expert, n_expert_used, + LLM_FFN_SILU, false, + false, 0.0, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(cur, "ffn_moe_out", il); + } + + // residual + cur = ggml_add(ctx0, ffn_inp, cur); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + // final rmsnorm + cur = build_norm(inpL, model.output_norm, NULL, LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_command_r : public llm_graph_context { + llm_build_command_r(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + + const float f_logit_scale = hparams.f_logit_scale; + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM, il); + cb(cur, "attn_norm", il); + + ggml_tensor * ffn_inp = cur; + + // self-attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); cb(Kcur, "Kcur", il); @@ -9125,9 +11000,7 @@ struct llm_build_command_r : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); @@ -9198,6 +11071,8 @@ struct llm_build_cohere2_iswa : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified_iswa(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const bool is_swa = hparams.is_swa(il); @@ -9260,9 +11135,7 @@ struct llm_build_cohere2_iswa : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); @@ -9333,6 +11206,8 @@ struct llm_build_olmo : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -9391,9 +11266,7 @@ struct llm_build_olmo : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -9461,6 +11334,8 @@ struct llm_build_olmo2 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -9511,18 +11386,16 @@ struct llm_build_olmo2 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + cur = build_norm(cur, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il); cb(cur, "attn_post_norm", il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); - } - ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); @@ -9590,6 +11463,8 @@ struct llm_build_olmoe : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -9644,9 +11519,7 @@ struct llm_build_olmoe : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -9716,6 +11589,8 @@ struct llm_build_openelm : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const int64_t n_head = hparams.n_head(il); const int64_t n_head_kv = hparams.n_head_kv(il); @@ -9737,10 +11612,10 @@ struct llm_build_openelm : public llm_graph_context { cur = ggml_reshape_3d(ctx0, cur, n_embd_head_k, n_head_qkv, n_tokens); - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, cur->nb[1], cur->nb[2], 0)); + ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, cur->nb[1], cur->nb[2], 0); cb(Qcur, "Qcur", il); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, cur->nb[1], cur->nb[2], cur->nb[1]*n_head)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, cur->nb[1], cur->nb[2], cur->nb[1]*n_head); cb(Kcur, "Kcur", il); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, cur->nb[1], cur->nb[2], cur->nb[1]*(n_head+n_head_kv))); @@ -9777,11 +11652,9 @@ struct llm_build_openelm : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { residual = ggml_get_rows(ctx0, residual, inp_out_ids); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); } ggml_tensor * ffn_inp = ggml_add(ctx0, residual, cur); @@ -9847,6 +11720,8 @@ struct llm_build_gptneox : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -9862,12 +11737,10 @@ struct llm_build_gptneox : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); - ggml_tensor * Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - ggml_tensor * Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + ggml_tensor * Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -9891,9 +11764,7 @@ struct llm_build_gptneox : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -9995,6 +11866,8 @@ struct llm_build_arctic : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10041,9 +11914,7 @@ struct llm_build_arctic : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -10135,6 +12006,8 @@ struct llm_build_deepseek : public llm_graph_context { const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10196,14 +12069,11 @@ struct llm_build_deepseek : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } - ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); @@ -10311,6 +12181,8 @@ struct llm_build_deepseek2 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10460,9 +12332,7 @@ struct llm_build_deepseek2 : public llm_graph_context { } } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -10558,6 +12428,8 @@ struct llm_build_bitnet : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10640,9 +12512,7 @@ struct llm_build_bitnet : public llm_graph_context { cb(cur, "attn_o_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -10717,6 +12587,8 @@ struct llm_build_t5_enc : public llm_graph_context { auto * inp_attn = build_attn_inp_no_cache(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10750,9 +12622,7 @@ struct llm_build_t5_enc : public llm_graph_context { cb(cur, "kqv_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -10823,6 +12693,8 @@ struct llm_build_t5_dec : public llm_graph_context { auto * inp_attn_self = build_attn_inp_kv_unified(); auto * inp_attn_cross = build_attn_inp_cross(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -10914,11 +12786,8 @@ struct llm_build_t5_dec : public llm_graph_context { //cb(cur, "kqv_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); inpCA = ggml_get_rows(ctx0, inpCA, inp_out_ids); } @@ -10988,6 +12857,8 @@ struct llm_build_jais : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { cur = build_norm(inpL, model.layers[il].attn_norm, @@ -11020,9 +12891,7 @@ struct llm_build_jais : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/float(n_embd_head), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); } @@ -11086,6 +12955,8 @@ struct llm_build_chatglm : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -11114,6 +12985,8 @@ struct llm_build_chatglm : public llm_graph_context { if (model.layers[il].bv) { Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); } + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); } else { cur = build_lora_mm(model.layers[il].wqkv, cur); cb(cur, "wqkv", il); @@ -11121,13 +12994,11 @@ struct llm_build_chatglm : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); } - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); } - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); //printf("freq_base: %f freq_scale: %f ext_factor: %f attn_factor: %f\n", freq_base, freq_scale, ext_factor, attn_factor); @@ -11152,9 +13023,7 @@ struct llm_build_chatglm : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -11219,6 +13088,8 @@ struct llm_build_glm4 : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -11248,6 +13119,8 @@ struct llm_build_glm4 : public llm_graph_context { if (model.layers[il].bv) { Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); } + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); } else { cur = build_lora_mm(model.layers[il].wqkv, cur); cb(cur, "wqkv", il); @@ -11255,13 +13128,11 @@ struct llm_build_glm4 : public llm_graph_context { cur = ggml_add(ctx0, cur, model.layers[il].bqkv); cb(cur, "bqkv", il); } - Qcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd, n_tokens, cur->nb[1], 0*sizeof(float)*(n_embd))); - Kcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd))); + Qcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 0*sizeof(float)*(n_embd)); + Kcur = ggml_view_3d(ctx0, cur, n_embd_head, n_head_kv, n_tokens, n_embd_head*sizeof(float), cur->nb[1], 1*sizeof(float)*(n_embd)); Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, n_embd_gqa, n_tokens, cur->nb[1], 1*sizeof(float)*(n_embd + n_embd_gqa))); } - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = ggml_rope_ext( @@ -11285,9 +13156,7 @@ struct llm_build_glm4 : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -11370,6 +13239,8 @@ struct llm_build_nemotron : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -11429,9 +13300,7 @@ struct llm_build_nemotron : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -11499,6 +13368,8 @@ struct llm_build_exaone : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -11560,9 +13431,7 @@ struct llm_build_exaone : public llm_graph_context { Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -11649,13 +13518,13 @@ struct llm_build_rwkv6_base : public llm_graph_context { } ggml_tensor * build_rwkv6_time_mix( + llm_graph_input_rs * inp, ggml_cgraph * gf, ggml_tensor * cur, ggml_tensor * x_prev, - ggml_tensor * state_copy, const llama_ubatch & ubatch, int il) const { - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = static_cast(mctx); const auto n_tokens = ubatch.n_tokens; const auto n_seqs = ubatch.n_seqs; @@ -11665,7 +13534,7 @@ struct llm_build_rwkv6_base : public llm_graph_context { const auto n_head = n_embd / head_size; const auto n_head_kv = hparams.n_head_kv(il); - const auto kv_head = kv_state->get_head(); + const auto kv_head = mctx_cur->get_head(); const auto & layer = model.layers[il]; @@ -11776,9 +13645,9 @@ struct llm_build_rwkv6_base : public llm_graph_context { k = ggml_sub(ctx0, k, ggml_mul(ctx0, k, w)); } - ggml_tensor * wkv_state = build_recurrent_state( - gf, kv_state->get_v_l(il), state_copy, - hparams.n_embd_v_s(), n_seqs); + ggml_tensor * wkv_state = build_rs( + inp, gf, mctx_cur->get_s_l(il), + hparams.n_embd_s(), n_seqs); ggml_tensor * wkv_output; if (is_qrwkv) { @@ -11796,9 +13665,9 @@ struct llm_build_rwkv6_base : public llm_graph_context { wkv_state, ggml_view_1d( ctx0, - kv_state->get_v_l(il), - hparams.n_embd_v_s() * n_seqs, - hparams.n_embd_v_s() * kv_head * ggml_element_size(kv_state->get_v_l(il)) + mctx_cur->get_s_l(il), + hparams.n_embd_s() * n_seqs, + hparams.n_embd_s() * kv_head * ggml_element_size(mctx_cur->get_s_l(il)) ) ) ); @@ -11832,19 +13701,19 @@ struct llm_build_rwkv6 : public llm_build_rwkv6_base { inpL = build_inp_embd(model.tok_embd); inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1); - ggml_tensor * state_copy = build_inp_s_copy(); + auto * rs_inp = build_rs_inp(); const auto n_embd = hparams.n_embd; const auto n_seq_tokens = ubatch.n_seq_tokens; const auto n_seqs = ubatch.n_seqs; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const llama_layer * layer = &model.layers[il]; inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs); - ggml_tensor * token_shift = build_rwkv_token_shift_load( - gf, state_copy, ubatch, il - ); + ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, gf, ubatch, il); ggml_tensor * att_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0); ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], n_embd * ggml_element_size(token_shift)); @@ -11859,7 +13728,7 @@ struct llm_build_rwkv6 : public llm_build_rwkv6_base { 1 ); - cur = build_rwkv6_time_mix(gf, att_norm, x_prev, state_copy, ubatch, il); + cur = build_rwkv6_time_mix(rs_inp, gf, att_norm, x_prev, ubatch, il); ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL); cb(ffn_inp, "ffn_inp", il); @@ -11881,13 +13750,16 @@ struct llm_build_rwkv6 : public llm_build_rwkv6_base { ); ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il)); - if (il == n_layer - 1) { - // skip computing output for unused tokens - struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - ffn_inp = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens), inp_out_ids); - ffn_norm = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens), inp_out_ids); - x_prev = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens), inp_out_ids); - cur = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, cur, n_embd, n_tokens), inp_out_ids); + ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens); + ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens); + x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens); + cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens); + + if (il == n_layer - 1 && inp_out_ids) { + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); + ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids); + x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); } cur = build_rwkv6_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV6); @@ -11922,26 +13794,26 @@ struct llm_build_rwkv6 : public llm_build_rwkv6_base { // ref: https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1/blob/main/modeling_rwkv6qwen2.py struct llm_build_rwkv6qwen2 : public llm_build_rwkv6_base { llm_build_rwkv6qwen2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_build_rwkv6_base(model, params) { - GGML_ASSERT(n_embd == hparams.n_embd_k_s()); + GGML_ASSERT(n_embd == hparams.n_embd_r()); ggml_tensor * cur; ggml_tensor * inpL; inpL = build_inp_embd(model.tok_embd); - ggml_tensor * state_copy = build_inp_s_copy(); + auto * rs_inp = build_rs_inp(); const auto n_embd = hparams.n_embd; const auto n_seq_tokens = ubatch.n_seq_tokens; const auto n_seqs = ubatch.n_seqs; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const llama_layer * layer = &model.layers[il]; inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs); - ggml_tensor * token_shift = build_rwkv_token_shift_load( - gf, state_copy, ubatch, il - ); + ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, gf, ubatch, il); ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM_RMS, il); cb(att_norm, "attn_norm", il); @@ -11953,7 +13825,7 @@ struct llm_build_rwkv6qwen2 : public llm_build_rwkv6_base { 1 ); - cur = build_rwkv6_time_mix(gf, att_norm, x_prev, state_copy, ubatch, il); + cur = build_rwkv6_time_mix(rs_inp, gf, att_norm, x_prev, ubatch, il); token_shift = ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(att_norm)); ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il)); @@ -11961,11 +13833,12 @@ struct llm_build_rwkv6qwen2 : public llm_build_rwkv6_base { ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL); cb(ffn_inp, "ffn_inp", il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, cur, n_embd, n_tokens), inp_out_ids); - ffn_inp = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens), inp_out_ids); + cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens); + ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens); + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); } // feed-forward network @@ -12041,14 +13914,14 @@ struct llm_build_rwkv7_base : public llm_graph_context { } ggml_tensor * build_rwkv7_time_mix( + llm_graph_input_rs * inp, ggml_cgraph * gf, ggml_tensor * cur, ggml_tensor * x_prev, - ggml_tensor * state_copy, ggml_tensor *& first_layer_value, const llama_ubatch & ubatch, int il) const { - const auto * kv_state = static_cast(mstate); + const auto * mctx_cur = static_cast(mctx); const auto n_tokens = ubatch.n_tokens; const auto n_seqs = ubatch.n_seqs; @@ -12057,7 +13930,7 @@ struct llm_build_rwkv7_base : public llm_graph_context { const auto head_count = n_embd / head_size; const auto n_seq_tokens = ubatch.n_seq_tokens; - const auto kv_head = kv_state->get_head(); + const auto kv_head = mctx_cur->get_head(); const auto & layer = model.layers[il]; @@ -12127,9 +14000,9 @@ struct llm_build_rwkv7_base : public llm_graph_context { v = ggml_reshape_3d(ctx0, v, head_size, head_count, n_tokens); a = ggml_reshape_3d(ctx0, a, head_size, head_count, n_tokens); - ggml_tensor * wkv_state = build_recurrent_state( - gf, kv_state->get_v_l(il), state_copy, - hparams.n_embd_v_s(), n_seqs); + ggml_tensor * wkv_state = build_rs( + inp, gf, mctx_cur->get_s_l(il), + hparams.n_embd_s(), n_seqs); ggml_tensor * wkv_output = ggml_rwkv_wkv7(ctx0, r, w, k, v, ggml_neg(ctx0, kk), ggml_mul(ctx0, kk, a), wkv_state); cur = ggml_view_1d(ctx0, wkv_output, n_embd * n_tokens, 0); @@ -12142,9 +14015,9 @@ struct llm_build_rwkv7_base : public llm_graph_context { wkv_state, ggml_view_1d( ctx0, - kv_state->get_v_l(il), - hparams.n_embd_v_s() * n_seqs, - hparams.n_embd_v_s() * kv_head * ggml_element_size(kv_state->get_v_l(il)) + mctx_cur->get_s_l(il), + hparams.n_embd_s() * n_seqs, + hparams.n_embd_s() * kv_head * ggml_element_size(mctx_cur->get_s_l(il)) ) ) ); @@ -12185,19 +14058,19 @@ struct llm_build_rwkv7 : public llm_build_rwkv7_base { inpL = build_inp_embd(model.tok_embd); inpL = build_norm(inpL, model.tok_norm, model.tok_norm_b, LLM_NORM, -1); - ggml_tensor * state_copy = build_inp_s_copy(); + auto * rs_inp = build_rs_inp(); const auto n_embd = hparams.n_embd; const auto n_seq_tokens = ubatch.n_seq_tokens; const auto n_seqs = ubatch.n_seqs; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const llama_layer * layer = &model.layers[il]; inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs); - ggml_tensor * token_shift = build_rwkv_token_shift_load( - gf, state_copy, ubatch, il - ); + ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, gf, ubatch, il); ggml_tensor * att_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], 0); ggml_tensor * ffn_shift = ggml_view_3d(ctx0, token_shift, n_embd, 1, n_seqs, token_shift->nb[1], token_shift->nb[2], n_embd * ggml_element_size(token_shift)); @@ -12212,7 +14085,7 @@ struct llm_build_rwkv7 : public llm_build_rwkv7_base { 1 ); - cur = build_rwkv7_time_mix(gf, att_norm, x_prev, state_copy, v_first, ubatch, il); + cur = build_rwkv7_time_mix(rs_inp, gf, att_norm, x_prev, v_first, ubatch, il); ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL); cb(ffn_inp, "ffn_inp", il); @@ -12234,12 +14107,14 @@ struct llm_build_rwkv7 : public llm_build_rwkv7_base { ); ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il)); - if (il == n_layer - 1) { - // skip computing output for unused tokens - struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - ffn_inp = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens), inp_out_ids); - ffn_norm = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens), inp_out_ids); - x_prev = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens), inp_out_ids); + ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens); + ffn_norm = ggml_reshape_2d(ctx0, ffn_norm, n_embd, n_tokens); + x_prev = ggml_reshape_2d(ctx0, x_prev, n_embd, n_tokens); + + if (il == n_layer - 1 && inp_out_ids) { + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); + ffn_norm = ggml_get_rows(ctx0, ffn_norm, inp_out_ids); + x_prev = ggml_get_rows(ctx0, x_prev, inp_out_ids); } cur = build_rwkv7_channel_mix(layer, ffn_norm, x_prev, LLM_ARCH_RWKV7); @@ -12270,7 +14145,7 @@ struct llm_build_rwkv7 : public llm_build_rwkv7_base { struct llm_build_arwkv7 : public llm_build_rwkv7_base { llm_build_arwkv7(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_build_rwkv7_base(model, params) { - GGML_ASSERT(n_embd == hparams.n_embd_k_s()); + GGML_ASSERT(n_embd == hparams.n_embd_r()); ggml_tensor * cur; ggml_tensor * inpL; @@ -12278,19 +14153,19 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base { inpL = build_inp_embd(model.tok_embd); - ggml_tensor * state_copy = build_inp_s_copy(); + auto * rs_inp = build_rs_inp(); const auto n_embd = hparams.n_embd; const auto n_seq_tokens = ubatch.n_seq_tokens; const auto n_seqs = ubatch.n_seqs; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { const llama_layer * layer = &model.layers[il]; inpL = ggml_reshape_3d(ctx0, inpL, n_embd, n_seq_tokens, n_seqs); - ggml_tensor * token_shift = build_rwkv_token_shift_load( - gf, state_copy, ubatch, il - ); + ggml_tensor * token_shift = build_rwkv_token_shift_load(rs_inp, gf, ubatch, il); ggml_tensor * att_norm = build_norm(inpL, layer->attn_norm, layer->attn_norm_b, LLM_NORM_RMS, il); cb(att_norm, "attn_norm", il); @@ -12302,7 +14177,7 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base { 1 ); - cur = build_rwkv7_time_mix(gf, att_norm, x_prev, state_copy, v_first, ubatch, il); + cur = build_rwkv7_time_mix(rs_inp, gf, att_norm, x_prev, v_first, ubatch, il); token_shift = ggml_view_3d(ctx0, att_norm, n_embd, 1, n_seqs, att_norm->nb[1], att_norm->nb[2], (n_seq_tokens-1)*n_embd*ggml_element_size(att_norm)); ggml_build_forward_expand(gf, build_rwkv_token_shift_store(token_shift, ubatch, il)); @@ -12310,11 +14185,12 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base { ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpL); cb(ffn_inp, "ffn_inp", il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, cur, n_embd, n_tokens), inp_out_ids); - ffn_inp = ggml_get_rows(ctx0, ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens), inp_out_ids); + cur = ggml_reshape_2d(ctx0, cur, n_embd, n_tokens); + ffn_inp = ggml_reshape_2d(ctx0, ffn_inp, n_embd, n_tokens); + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); } // feed-forward network @@ -12355,13 +14231,11 @@ struct llm_build_arwkv7 : public llm_build_rwkv7_base { } }; - struct llm_build_granite : public llm_graph_context { llm_build_granite( const llama_model & model, const llm_graph_params & params, - ggml_cgraph * gf, - const bool use_rope = true) + ggml_cgraph * gf) : llm_graph_context(params) { const int64_t n_embd_head = hparams.n_embd_head_v; @@ -12376,13 +14250,14 @@ struct llm_build_granite : public llm_graph_context { // inp_pos - built only if rope enabled ggml_tensor * inp_pos = nullptr; - if (use_rope) { + if (hparams.rope_finetuned) { inp_pos = build_inp_pos(); } auto * inp_attn = build_attn_inp_kv_unified(); - const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -12393,130 +14268,17 @@ struct llm_build_granite : public llm_graph_context { cb(cur, "attn_norm", il); // self-attention - { - // compute Q and K and (optionally) RoPE them - ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); - cb(Qcur, "Qcur", il); - if (model.layers[il].bq) { - Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); - cb(Qcur, "Qcur", il); - } - - ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); - cb(Kcur, "Kcur", il); - if (model.layers[il].bk) { - Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); - cb(Kcur, "Kcur", il); - } - - ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); - cb(Vcur, "Vcur", il); - if (model.layers[il].bv) { - Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); - cb(Vcur, "Vcur", il); - } - - Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); - Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - - if (use_rope) { - ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); - Qcur = ggml_rope_ext( - ctx0, Qcur, inp_pos, rope_factors, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - - Kcur = ggml_rope_ext( - ctx0, Kcur, inp_pos, rope_factors, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); - } - - cb(Qcur, "Qcur", il); - cb(Kcur, "Kcur", il); - cb(Vcur, "Vcur", il); - - cur = build_attn(inp_attn, gf, - model.layers[il].wo, model.layers[il].bo, - Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); - cb(cur, "attn_out", il); - } + cur = build_attention_layer( + gf, cur, inp_pos, inp_attn, + model, n_embd_head, il); - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } - // For Granite architectures - scale residual - cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); - ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); - cb(ffn_inp, "ffn_inp", il); - - // feed-forward network (non-MoE) - if (model.layers[il].ffn_gate_inp == nullptr) { - - cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, il); - cb(cur, "ffn_norm", il); - - cur = build_ffn(cur, - model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, - model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL, - model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, - NULL, - LLM_FFN_SILU, LLM_FFN_PAR, il); - cb(cur, "ffn_out", il); - - } else { - // MoE branch - cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, il); - cb(cur, "ffn_norm", il); - - ggml_tensor * moe_out = build_moe_ffn(cur, - model.layers[il].ffn_gate_inp, - model.layers[il].ffn_up_exps, - model.layers[il].ffn_gate_exps, - model.layers[il].ffn_down_exps, - nullptr, - n_expert, n_expert_used, - LLM_FFN_SILU, true, - false, 0.0, - LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, - il); - cb(moe_out, "ffn_moe_out", il); - - // For Granite MoE Shared - if (hparams.n_ff_shexp > 0) { - ggml_tensor * ffn_shexp = build_ffn(cur, - model.layers[il].ffn_up_shexp, NULL, NULL, - model.layers[il].ffn_gate_shexp, NULL, NULL, - model.layers[il].ffn_down_shexp, NULL, NULL, - NULL, - LLM_FFN_SILU, LLM_FFN_PAR, il); - cb(ffn_shexp, "ffn_shexp", il); - - cur = ggml_add(ctx0, moe_out, ffn_shexp); - cb(cur, "ffn_out", il); - } else { - cur = moe_out; - } - } - - // For Granite architectures - scale residual - cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); - cur = ggml_add(ctx0, cur, ffn_inp); - cb(cur, "ffn_out", il); - - cur = build_cvec(cur, il); - cb(cur, "l_out", il); + // ffn + cur = build_layer_ffn(cur, inpSA, model, il); // input for next layer inpL = cur; @@ -12541,42 +14303,408 @@ struct llm_build_granite : public llm_graph_context { ggml_build_forward_expand(gf, cur); } -}; -// ref: https://github.com/facebookresearch/chameleon -// based on the original build_llama() function, changes: -// * qk-norm -// * swin-norm -// * removed bias -// * removed MoE -struct llm_build_chameleon : public llm_graph_context { - llm_build_chameleon(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - const int64_t n_embd_head = hparams.n_embd_head_v; + ggml_tensor * build_attention_layer( + ggml_cgraph * gf, + ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv_unified * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il) { + + // compute Q and K and (optionally) RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } - GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); - GGML_ASSERT(n_embd_head == hparams.n_rot); + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } - ggml_tensor * cur; - ggml_tensor * inpL; + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } - inpL = build_inp_embd(model.tok_embd); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, hparams.n_head(il), n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, hparams.n_head_kv(il), n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, hparams.n_head_kv(il), n_tokens); - // inp_pos - contains the positions - ggml_tensor * inp_pos = build_inp_pos(); + const bool use_rope = hparams.rope_finetuned; + if (use_rope) { + ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); - auto * inp_attn = build_attn_inp_kv_unified(); + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + } - for (int il = 0; il < n_layer; ++il) { - ggml_tensor * inpSA = inpL; + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); - // norm - if (hparams.swin_norm) { - cur = inpL; - } else { - cur = build_norm(inpL, - model.layers[il].attn_norm, NULL, - LLM_NORM_RMS, il); - cb(cur, "attn_norm", il); + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(cur, "attn_out", il); + return cur; + } + + ggml_tensor * build_layer_ffn( + ggml_tensor * cur, + ggml_tensor * inpSA, + const llama_model & model, + const int il) { + + // For Granite architectures - scale residual + if (hparams.f_residual_scale) { + cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); + } + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network (non-MoE) + if (model.layers[il].ffn_gate_inp == nullptr) { + + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, + model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + + } else { + // MoE branch + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + ggml_tensor * moe_out = build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + nullptr, + n_expert, n_expert_used, + LLM_FFN_SILU, true, + false, 0.0, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(moe_out, "ffn_moe_out", il); + + // For Granite MoE Shared + if (hparams.n_ff_shexp > 0) { + ggml_tensor * ffn_shexp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + cb(cur, "ffn_out", il); + } else { + cur = moe_out; + } + } + + // For Granite architectures - scale residual + if (hparams.f_residual_scale) { + cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); + } + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "ffn_out", il); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + return cur; + } +}; + +struct llm_build_granite_hybrid : public llm_graph_context_mamba { + + llm_build_granite_hybrid( + const llama_model & model, + const llm_graph_params & params, + ggml_cgraph * gf) : + llm_graph_context_mamba(params) { + + const int64_t n_embd_head = hparams.n_embd_head_v; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + auto * inp = build_inp_mem_hybrid(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + // Positional embeddings populated if rope enabled + ggml_tensor * inp_pos = nullptr; + if (hparams.rope_finetuned) { + inp_pos = build_inp_pos(); + } + + for (int il = 0; il < n_layer; ++il) { + struct ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + if (hparams.is_recurrent(il)) { + // ssm layer // + cur = build_mamba2_layer(inp->get_recr(), gf, cur, model, ubatch, il); + } else { + // attention layer // + cur = build_attention_layer( + gf, cur, inp_pos, inp->get_attn(), model, + n_embd_head, il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + // ffn + cur = build_layer_ffn(cur, inpSA, model, il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + // For Granite architectures - scale logits + if (hparams.f_logit_scale) { + cur = ggml_scale(ctx0, cur, 1.0f / hparams.f_logit_scale); + } + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } + + ggml_tensor * build_attention_layer( + ggml_cgraph * gf, + ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv_unified * inp_attn, + const llama_model & model, + const int64_t n_embd_head, + const int il) { + + // compute Q and K and (optionally) RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, hparams.n_head(il), n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, hparams.n_head_kv(il), n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, hparams.n_head_kv(il), n_tokens); + + const bool use_rope = hparams.rope_finetuned; + if (use_rope) { + ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + } + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(cur, "attn_out", il); + return cur; + } + + ggml_tensor * build_layer_ffn( + ggml_tensor * cur, + ggml_tensor * inpSA, + const llama_model & model, + const int il) { + + // For Granite architectures - scale residual + if (hparams.f_residual_scale) { + cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); + } + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network (non-MoE) + if (model.layers[il].ffn_gate_inp == nullptr) { + + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, + model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + + } else { + // MoE branch + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + ggml_tensor * moe_out = build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + nullptr, + n_expert, n_expert_used, + LLM_FFN_SILU, true, + false, 0.0, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(moe_out, "ffn_moe_out", il); + + // For Granite MoE Shared + if (hparams.n_ff_shexp > 0) { + ggml_tensor * ffn_shexp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + cb(cur, "ffn_out", il); + } else { + cur = moe_out; + } + } + + // For Granite architectures - scale residual + if (hparams.f_residual_scale) { + cur = ggml_scale(ctx0, cur, hparams.f_residual_scale); + } + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "ffn_out", il); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + return cur; + } +}; + +// ref: https://github.com/facebookresearch/chameleon +// based on the original build_llama() function, changes: +// * qk-norm +// * swin-norm +// * removed bias +// * removed MoE +struct llm_build_chameleon : public llm_graph_context { + llm_build_chameleon(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + if (hparams.swin_norm) { + cur = inpL; + } else { + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); } // self-attention @@ -12642,21 +14770,19 @@ struct llm_build_chameleon : public llm_graph_context { cur = build_attn(inp_attn, gf, model.layers[il].wo, nullptr, Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); - - if (hparams.swin_norm) { - cur = build_norm(cur, - model.layers[il].attn_norm, NULL, - LLM_NORM_RMS, il); - } } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } + if (hparams.swin_norm) { + cur = build_norm(cur, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + } + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); @@ -12787,109 +14913,1448 @@ struct llm_build_wavtokenizer_dec : public llm_graph_context { k = ggml_conv_1d_ph(ctx0, layer.attn_k, cur, 1, 1); v = ggml_conv_1d_ph(ctx0, layer.attn_v, cur, 1, 1); - q = ggml_add(ctx0, q, layer.attn_q_b); - k = ggml_add(ctx0, k, layer.attn_k_b); - v = ggml_add(ctx0, v, layer.attn_v_b); + q = ggml_add(ctx0, q, layer.attn_q_b); + k = ggml_add(ctx0, k, layer.attn_k_b); + v = ggml_add(ctx0, v, layer.attn_v_b); + + q = ggml_cont(ctx0, ggml_transpose(ctx0, q)); + k = ggml_cont(ctx0, ggml_transpose(ctx0, k)); + + ggml_tensor * kq = ggml_mul_mat(ctx0, k, q); + + kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(hparams.posnet.n_embd)), 0.0f); + + cur = ggml_mul_mat(ctx0, kq, v); + + cur = ggml_conv_1d_ph(ctx0, layer.attn_o, cur, 1, 1); + cur = ggml_add(ctx0, cur, layer.attn_o_b); + + cur = ggml_add(ctx0, cur, inpL); + } break; + case 5: + { + cur = build_norm(cur, + layer.norm, + layer.norm_b, + LLM_NORM_GROUP, 0); + } break; + default: GGML_ABORT("unknown posnet layer"); + }; + } + + cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + + cur = build_norm(cur, + model.tok_norm, + model.tok_norm_b, + LLM_NORM, -1); + + cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + + inpL = cur; + + // convnext + for (uint32_t il = 0; il < hparams.convnext.n_layer; ++il) { + const auto & layer = model.layers[il].convnext; + + cur = inpL; + + cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1); + cur = ggml_add(ctx0, cur, layer.dw_b); + + cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + + cur = build_norm(cur, + layer.norm, + layer.norm_b, + LLM_NORM, -1); + + cur = build_ffn(cur, + layer.pw1, layer.pw1_b, NULL, + NULL, NULL, NULL, + layer.pw2, layer.pw2_b, NULL, + NULL, + LLM_FFN_GELU, LLM_FFN_SEQ, il); + + cur = ggml_mul(ctx0, cur, layer.gamma); + + cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + + inpL = ggml_add(ctx0, cur, inpL); + } + + cur = inpL; + + cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + + cur = build_norm(cur, + model.output_norm, + model.output_norm_b, + LLM_NORM, -1); + + // lm_head + cur = build_lora_mm(model.output, cur); + + cur = ggml_add(ctx0, cur, model.output_b); + + cb(cur, "result_embd", -1); + res->t_embd = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_plm : public llm_graph_context { + llm_build_plm(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const float kq_scale = 1.0f/sqrtf(float(hparams.n_embd_head_k)); + + const uint32_t n_embd_head_qk_rope = hparams.n_rot; + const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot; + const uint32_t kv_lora_rank = hparams.n_lora_kv; + + ggml_tensor * cur; + ggml_tensor * inpL; + + // {n_embd, n_tokens} + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self_attention + { + ggml_tensor * q = NULL; + q = ggml_mul_mat(ctx0, model.layers[il].wq, cur); + cb(q, "q", il); + + // split into {n_head * n_embd_head_qk_nope, n_tokens} + ggml_tensor * q_nope = ggml_view_3d(ctx0, q, n_embd_head_qk_nope, n_head, n_tokens, + ggml_row_size(q->type, hparams.n_embd_head_k), + ggml_row_size(q->type, hparams.n_embd_head_k * n_head), + 0); + cb(q_nope, "q_nope", il); + + // and {n_head * n_embd_head_qk_rope, n_tokens} + ggml_tensor * q_pe = ggml_view_3d(ctx0, q, n_embd_head_qk_rope, n_head, n_tokens, + ggml_row_size(q->type, hparams.n_embd_head_k), + ggml_row_size(q->type, hparams.n_embd_head_k * n_head), + ggml_row_size(q->type, n_embd_head_qk_nope)); + cb(q_pe, "q_pe", il); + + // {n_embd, kv_lora_rank + n_embd_head_qk_rope} * {n_embd, n_tokens} -> {kv_lora_rank + n_embd_head_qk_rope, n_tokens} + ggml_tensor * kv_pe_compresseed = ggml_mul_mat(ctx0, model.layers[il].wkv_a_mqa, cur); + cb(kv_pe_compresseed, "kv_pe_compresseed", il); + + // split into {kv_lora_rank, n_tokens} + ggml_tensor * kv_compressed = ggml_view_2d(ctx0, kv_pe_compresseed, kv_lora_rank, n_tokens, + kv_pe_compresseed->nb[1], + 0); + cb(kv_compressed, "kv_compressed", il); + + // and {n_embd_head_qk_rope, n_tokens} + ggml_tensor * k_pe = ggml_view_3d(ctx0, kv_pe_compresseed, n_embd_head_qk_rope, 1, n_tokens, + kv_pe_compresseed->nb[1], + kv_pe_compresseed->nb[1], + ggml_row_size(kv_pe_compresseed->type, kv_lora_rank)); + cb(k_pe, "k_pe", il); + + kv_compressed = build_norm(kv_compressed, + model.layers[il].attn_kv_a_norm, NULL, + LLM_NORM_RMS, il); + cb(kv_compressed, "kv_compressed", il); + + // {kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)} * {kv_lora_rank, n_tokens} -> {n_head * (n_embd_head_qk_nope + n_embd_head_v), n_tokens} + ggml_tensor * kv = ggml_mul_mat(ctx0, model.layers[il].wkv_b, kv_compressed); + cb(kv, "kv", il); + + // split into {n_head * n_embd_head_qk_nope, n_tokens} + ggml_tensor * k_nope = ggml_view_3d(ctx0, kv, n_embd_head_qk_nope, n_head, n_tokens, + ggml_row_size(kv->type, n_embd_head_qk_nope + hparams.n_embd_head_v), + ggml_row_size(kv->type, n_head * (n_embd_head_qk_nope + hparams.n_embd_head_v)), + 0); + cb(k_nope, "k_nope", il); + + // and {n_head * n_embd_head_v, n_tokens} + ggml_tensor * v_states = ggml_view_3d(ctx0, kv, hparams.n_embd_head_v, n_head, n_tokens, + ggml_row_size(kv->type, (n_embd_head_qk_nope + hparams.n_embd_head_v)), + ggml_row_size(kv->type, (n_embd_head_qk_nope + hparams.n_embd_head_v)*n_head), + ggml_row_size(kv->type, (n_embd_head_qk_nope))); + cb(v_states, "v_states", il); + + v_states = ggml_cont(ctx0, v_states); + cb(v_states, "v_states", il); + + v_states = ggml_view_2d(ctx0, v_states, hparams.n_embd_head_v * n_head, n_tokens, + ggml_row_size(kv->type, hparams.n_embd_head_v * n_head), + 0); + cb(v_states, "v_states", il); + + q_pe = ggml_rope_ext( + ctx0, q_pe, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(q_pe, "q_pe", il); + + // shared RoPE key + k_pe = ggml_rope_ext( + ctx0, k_pe, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + cb(k_pe, "k_pe", il); + + ggml_tensor * q_states = ggml_concat(ctx0, q_nope, q_pe, 0); + cb(q_states, "q_states", il); + + ggml_tensor * k_states = ggml_concat(ctx0, k_nope, ggml_repeat(ctx0, k_pe, q_pe), 0); + cb(k_states, "k_states", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, NULL, + q_states, k_states, v_states, nullptr, nullptr, kq_scale, il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_RELU_SQR, LLM_FFN_SEQ, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_bailingmoe : public llm_graph_context { + llm_build_bailingmoe(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self-attention + { + // rope freq factors for llama3; may return nullptr for llama2 and other models + ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); + + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_rot, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_rot, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_rot, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_rot)), il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + ggml_tensor * moe_out = + build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + nullptr, + n_expert, n_expert_used, + LLM_FFN_SILU, hparams.expert_weights_norm, + false, hparams.expert_weights_scale, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(moe_out, "ffn_moe_out", il); + + // FFN shared expert + { + ggml_tensor * ffn_shexp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + cb(cur, "ffn_out", il); + } + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_dots1 : public llm_graph_context { + llm_build_dots1(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self_attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il); + cb(Qcur, "Qcur_normed", il); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il); + cb(Kcur, "Kcur_normed", il); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // MoE branch + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + if ((uint32_t) il < hparams.n_layer_dense_lead) { + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + } else { + ggml_tensor * moe_out = + build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + model.layers[il].ffn_exp_probs_b, + n_expert, n_expert_used, + LLM_FFN_SILU, hparams.expert_weights_norm, + true, hparams.expert_weights_scale, + (llama_expert_gating_func_type) hparams.expert_gating_func, + il); + cb(moe_out, "ffn_moe_out", il); + + { + ggml_tensor * ffn_shexp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + cb(cur, "ffn_out", il); + } + } + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_ernie4_5 : public llm_graph_context { + llm_build_ernie4_5(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + // norm + { + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + } + + // self-attention + { + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, NULL, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + } + + if (il == n_layer - 1) { + // skip computing output for unused tokens + ggml_tensor * inp_out_ids = build_inp_out_ids(); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + { + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + } + + cur = ggml_add(ctx0, cur, ffn_inp); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_ernie4_5_moe : public llm_graph_context { + llm_build_ernie4_5_moe(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + GGML_ASSERT(hparams.n_moe_layer_step > 0 && "Ernie 4.5 MoE requires n_moe_layer_step > 0"); + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + // norm + { + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + } + + // self-attention + { + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + cur = build_attn(inp_attn, gf, + model.layers[il].wo, NULL, + Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + cb(cur, "attn_out", il); + } + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + bool is_moe_layer = static_cast(il) >= hparams.n_layer_dense_lead && (il + 1) % hparams.n_moe_layer_step == 0; + + if (!is_moe_layer) { + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + } else { + // MoE branch + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + ggml_tensor * moe_out = build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + model.layers[il].ffn_exp_probs_b, + n_expert, n_expert_used, + LLM_FFN_SILU, true, + false, 0.0, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(moe_out, "ffn_moe_out", il); + + // Shared expert (if present) + if (hparams.n_ff_shexp > 0) { + ggml_tensor * ffn_shexp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(ffn_shexp, "ffn_shexp", il); + + cur = ggml_add(ctx0, moe_out, ffn_shexp); + } else { + cur = moe_out; + } + cb(cur, "ffn_out", il); + } + + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "ffn_out", il); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_falcon_h1 : public llm_graph_context_mamba { + llm_build_falcon_h1(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context_mamba(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + // Build the inputs in the recurrent & kv cache + auto * inp = build_inp_mem_hybrid(); + + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; + + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); + + // self-attention + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, hparams.rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, hparams.rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cb(Qcur, "Qcur-post-rope", il); + cb(Kcur, "Kcur-post-rope", il); + cb(Vcur, "Vcur-post-rope", il); + + ggml_tensor * attn_out = build_attn(inp->get_attn(), gf, + model.layers[il].wo, NULL, + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(attn_out, "attn_out", il); + + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + // Mamba2 layer + cb(cur, "ssm_in", il); + + ggml_tensor * ssm_out = build_mamba2_layer(inp->get_recr(), gf, cur, model, ubatch, il); + cb(ssm_out, "ssm_out", il); + + // // Aggregation + cur = ggml_add(ctx0, attn_out, ssm_out); + inpSA = ggml_add(ctx0, cur, inpSA); + cb(cur, "layer_out", il); + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } + + ggml_tensor * ffn_inp = inpSA; + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, + model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "ffn_out", il); + + cur = ggml_add(ctx0, cur, inpSA); + + cur = build_cvec(cur, il); + cb(cur, "l_out", il); + + // input for next layer + inpL = cur; + } + + cur = inpL; + + cur = build_norm(cur, + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; + + // lm_head + cur = build_lora_mm(model.output, cur); + + cb(cur, "result_output", -1); + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } +}; + +struct llm_build_plamo2 : public llm_graph_context_mamba { + llm_build_plamo2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context_mamba(params) { + ggml_tensor * cur; + ggml_tensor * inpL; + + // {n_embd, n_tokens} + inpL = build_inp_embd(model.tok_embd); + cb(inpL, "embedding_output", -1); + + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_hybrid = build_inp_mem_hybrid(); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * residual = inpL; + + // ggml_graph_add_node(gf, model.layers[il].attn_norm); + // cb(model.layers[il].attn_norm, "attn_norm", il); + + // pre_mixer_norm + cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); + + // check if this layer is Mamba or Attention + bool is_mamba_layer = hparams.is_recurrent(il); + + if (is_mamba_layer) { + // PLaMo-2 Mamba layer + cur = build_plamo2_mamba_layer(inp_hybrid->get_recr(), gf, cur, model, ubatch, il); + } else { + // PLaMo-2 Attention layer + cur = build_plamo2_attn_layer(inp_hybrid->get_attn(), inp_pos, gf, cur, model, il); + } + + // post_mixer_norm + cur = build_norm(cur, model.layers[il].attn_post_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "attn_post_norm", il); + + // residual connection + cur = ggml_add(ctx0, cur, residual); + cb(cur, "attn_residual", il); + residual = cur; + + // pre-ffn norm + cur = build_norm(cur, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "ffn_pre_norm", il); + + // feed-forward network + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SWIGLU, LLM_FFN_SEQ, il); + cb(cur, "ffn_out", il); + + // post ffn norm + cur = build_norm(cur, model.layers[il].ffn_post_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "ffn_post_norm", il); + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + residual = ggml_get_rows(ctx0, residual, inp_out_ids); + } + + // residual connection + cur = ggml_add(ctx0, cur, residual); + cb(cur, "ffn_residual", il); + + inpL = cur; + } + + cur = inpL; + + // final norm + cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1); + cb(cur, "result_norm", -1); + + // lm_head + cur = build_lora_mm(model.output, cur); + cb(cur, "result_output", -1); + + // Explicitly mark as output tensor to ensure proper backend assignment + ggml_set_output(cur); + + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } + +private: + ggml_tensor * build_plamo2_attn_layer( + llm_graph_input_attn_kv_unified * inp, + ggml_tensor * inp_pos, + ggml_cgraph * gf, + ggml_tensor * cur, + const llama_model & model, + int il) { + + // self-attention + { + // PLaMo-2 uses combined QKV tensor + ggml_tensor * qkv = build_lora_mm(model.layers[il].wqkv, cur); + cb(qkv, "qkv", il); + + // split QKV tensor into Q, K, V + const int64_t n_embd_head_q = hparams.n_embd_head_k; + const int64_t n_embd_head_k = hparams.n_embd_head_k; + const int64_t n_embd_head_v = hparams.n_embd_head_v; + int32_t n_head_kv = hparams.n_head_kv(il); + + const int64_t q_offset = 0; + const int64_t k_offset = n_embd_head_q * n_head; + const int64_t v_offset = k_offset + n_embd_head_k * n_head_kv; + + ggml_tensor * Qcur = ggml_view_3d(ctx0, qkv, n_embd_head_q, n_head, n_tokens, n_embd_head_q * sizeof(float), qkv->nb[1], q_offset * ggml_element_size(qkv)); + ggml_tensor * Kcur = ggml_view_3d(ctx0, qkv, n_embd_head_k, n_head_kv, n_tokens, n_embd_head_k * sizeof(float), qkv->nb[1], k_offset * ggml_element_size(qkv)); + ggml_tensor * Vcur = ggml_cont(ctx0, ggml_view_2d(ctx0, qkv, n_embd_head_v * n_head_kv, n_tokens, qkv->nb[1], v_offset * ggml_element_size(qkv))); + + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head_v, n_head_kv, n_tokens); + + Qcur = build_norm(Qcur, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il); + cb(Qcur, "Qcur_normed", il); + + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + Kcur = build_norm(Kcur, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il); + cb(Kcur, "Kcur_normed", il); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cur = build_attn(inp, gf, model.layers[il].wo, NULL, Qcur, Kcur, Vcur, NULL, NULL, 1.0f, il); + } + + cb(cur, "attn_out", il); + + return cur; + } + + ggml_tensor * build_plamo2_mamba_layer( + llm_graph_input_rs * inp, + ggml_cgraph * gf, + ggml_tensor * cur, + const llama_model & model, + const llama_ubatch & ubatch, + int il) { + + const auto * mctx_cur = inp->mctx; + + const auto kv_head = mctx_cur->get_head(); + + const int64_t d_conv = hparams.ssm_d_conv; + const int64_t d_inner = hparams.ssm_d_inner; + const int64_t d_state = hparams.ssm_d_state; + const int64_t n_heads = hparams.ssm_dt_rank; + const int64_t head_dim = d_inner / n_heads; + const int64_t n_group = hparams.ssm_n_group; + const int64_t n_seqs = ubatch.n_seqs; + + const int64_t n_seq_tokens = ubatch.n_seq_tokens; + + GGML_ASSERT(n_seqs != 0); + GGML_ASSERT(ubatch.equal_seqs()); + GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs); + + ggml_tensor * conv_states_all = mctx_cur->get_r_l(il); + ggml_tensor * ssm_states_all = mctx_cur->get_s_l(il); + + ggml_tensor * conv = build_rs(inp, gf, conv_states_all, hparams.n_embd_r(), n_seqs); + conv = ggml_reshape_3d(ctx0, conv, d_conv - 1, d_inner + 2*n_group*d_state, n_seqs); + + // {n_embd, n_tokens} => {n_embd, n_seq_tokens, n_seqs} + cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], n_seq_tokens, n_seqs); + + // in_proj: {n_embd, 2*d_inner} @ {n_embd, n_seq_tokens, n_seqs} => {2*d_inner, n_seq_tokens, n_seqs} + ggml_tensor * zx = build_lora_mm(model.layers[il].ssm_in, cur); + cb(zx, "mamba_in_proj", il); + // {8192, 5, 1, 1} -> {8192, 1, 5, 1} + zx = ggml_permute(ctx0, zx, 0, 2, 1, 3); + zx = ggml_cont(ctx0, zx); + zx = ggml_reshape_4d(ctx0, zx, head_dim * 2, n_heads, n_seq_tokens, n_seqs); + cb(zx, "mamba_in_proj_out", il); + + // split into z and x + // => {head_dim * n_heads, n_seq_tokens, n_seqs} + ggml_tensor * x = ggml_view_4d(ctx0, zx, head_dim, n_heads, n_seq_tokens, n_seqs, zx->nb[1], zx->nb[2], zx->nb[3], head_dim*ggml_element_size(zx)); + x = ggml_cont(ctx0, x); + x = ggml_reshape_3d(ctx0, x, head_dim * n_heads, n_seq_tokens, n_seqs); + // x = ggml_permute(ctx0, x, 0, 2, 1, 3); + cb(x, "mamba_x_split", il); + + ggml_tensor * z = ggml_view_4d(ctx0, zx, head_dim, n_heads, n_seq_tokens, n_seqs, zx->nb[1], zx->nb[2], zx->nb[3], 0); + cb(z, "mamba_z_split", il); + + // conv1d + { + // => {d_conv - 1 + n_seq_tokens, d_inner, n_seqs} + ggml_tensor * conv_x = ggml_concat(ctx0, conv, ggml_transpose(ctx0, x), 0); + cb(conv_x, "mamba_conv1d_input", il); + + // copy last (d_conv - 1) columns back into the state cache + ggml_tensor * last_conv = ggml_view_3d(ctx0, conv_x, d_conv - 1, d_inner, n_seqs, + conv_x->nb[1], conv_x->nb[2], n_seq_tokens*(conv_x->nb[0])); + + ggml_build_forward_expand(gf, + ggml_cpy(ctx0, last_conv, + ggml_view_1d(ctx0, conv_states_all, + (d_conv - 1)*(d_inner)*(n_seqs), + kv_head*(d_conv - 1)*(d_inner)*ggml_element_size(conv_states_all)))); + + // 1D convolution + x = ggml_ssm_conv(ctx0, conv_x, model.layers[il].ssm_conv1d); + cb(x, "mamba_conv1d", il); + + x = ggml_silu(ctx0, x); + cb(x, "mamba_conv1d_silu", il); + } + + // SSM + { + // bcdt_proj: {d_inner, dt_rank + 2*d_state} @ {d_inner, n_seq_tokens, n_seqs} => {dt_rank + 2*d_state, n_seq_tokens, n_seqs} + ggml_tensor * x_bcdt = build_lora_mm(model.layers[il].ssm_x, x); + cb(x_bcdt, "mamba_bcdt_proj", il); + + // split into dt, B, C + const int64_t dt_dim = std::max(64, int(hparams.n_embd / 16)); + ggml_tensor * B = ggml_view_3d(ctx0, x_bcdt, d_state, n_seq_tokens, n_seqs, x_bcdt->nb[1], x_bcdt->nb[2], 0); + ggml_tensor * C = ggml_view_3d(ctx0, x_bcdt, d_state, n_seq_tokens, n_seqs, x_bcdt->nb[1], x_bcdt->nb[2], ggml_element_size(x_bcdt)*d_state); + ggml_tensor * dt = ggml_view_3d(ctx0, x_bcdt, dt_dim, n_seq_tokens, n_seqs, x_bcdt->nb[1], x_bcdt->nb[2], ggml_element_size(x_bcdt)*(2*d_state)); + cb(B, "mamba_B_raw", il); + cb(C, "mamba_C_raw", il); + cb(dt, "mamba_dt_raw", il); + + // Apply RMS norm to dt, B, C (PLaMo-2 specific) + B = build_norm(B, model.layers[il].ssm_b_norm, NULL, LLM_NORM_RMS, il); + C = build_norm(C, model.layers[il].ssm_c_norm, NULL, LLM_NORM_RMS, il); + dt = build_norm(dt, model.layers[il].ssm_dt_norm, NULL, LLM_NORM_RMS, il); + cb(B, "mamba_B_normed", il); + cb(C, "mamba_C_normed", il); + cb(dt, "mamba_dt_normed", il); + + // dt_proj: {dt_rank, d_inner} @ {dt_rank, n_seq_tokens, n_seqs} => {d_inner, n_seq_tokens, n_seqs} + dt = build_lora_mm(model.layers[il].ssm_dt, dt); + dt = ggml_add(ctx0, dt, model.layers[il].ssm_dt_b); + cb(dt, "mamba_dt_proj", il); + + ggml_tensor * A = ggml_reshape_2d(ctx0, model.layers[il].ssm_a, 1, n_heads); + cb(A, "mamba_A", il); + + x = ggml_view_4d(ctx0, x, head_dim, n_heads, n_seq_tokens, n_seqs, head_dim * ggml_element_size(x), head_dim * n_heads * ggml_element_size(x), head_dim * n_heads * n_seq_tokens * ggml_element_size(x), 0); + B = ggml_view_4d(ctx0, B, d_state, 1, n_seq_tokens, n_seqs, d_state * B->nb[0], B->nb[1], B->nb[2], 0); + C = ggml_view_4d(ctx0, C, d_state, 1, n_seq_tokens, n_seqs, d_state * C->nb[0], C->nb[1], C->nb[2], 0); + + // use the states and the indices provided by build_recurrent_state + // (this is necessary in order to properly use the states before they are overwritten, + // while avoiding to make unnecessary copies of the states) + auto get_ssm_rows = [&](ggml_context * ctx, ggml_tensor * states, ggml_tensor * ids) { + ggml_tensor * ssm = ggml_reshape_4d(ctx, states, d_state, head_dim, n_heads, mctx_cur->get_size()); + + // Custom operator to optimize the parallel associative scan + // as described in the Annex D of the Mamba paper. + // => {d_inner, n_seq_tokens, n_seqs} and {d_state, d_inner, n_seqs} + return ggml_ssm_scan(ctx, ssm, x, dt, A, B, C, ids); + }; + + ggml_tensor * y_ssm = build_rs(inp, gf, ssm_states_all, hparams.n_embd_s(), ubatch.n_seqs, get_ssm_rows); + cb(y_ssm, "mamba_ssm_scan", il); + + // store last states + ggml_build_forward_expand(gf, + ggml_cpy(ctx0, + ggml_view_1d(ctx0, y_ssm, d_state*d_inner*n_seqs, x->nb[3]*x->ne[3]), + ggml_view_1d(ctx0, ssm_states_all, d_state*d_inner*n_seqs, + kv_head*d_state*d_inner*ggml_element_size(ssm_states_all)))); + + ggml_tensor * y = ggml_view_4d(ctx0, y_ssm, head_dim, n_heads, n_seq_tokens, n_seqs, head_dim * ggml_element_size(x), head_dim * n_heads * ggml_element_size(x), head_dim * n_heads * n_seq_tokens * ggml_element_size(x), 0); + cb(y, "mamba_y_view", il); + + // Add D parameter and apply gating with z + // {d_inner, n_seq_tokens, n_seqs} * {d_inner} => {d_inner, n_seq_tokens, n_seqs} + ggml_tensor * D = ggml_reshape_2d(ctx0, model.layers[il].ssm_d, 1, n_heads); + y = ggml_add(ctx0, y, ggml_mul(ctx0, x, D)); + cb(y, "mamba_y_add_d", il); + + y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y); + cb(y, "mamba_y_swiglu_z", il); + + // out_proj: {d_inner, n_embd} @ {d_inner, n_seq_tokens, n_seqs} => {n_embd, n_seq_tokens, n_seqs} + y = ggml_view_3d(ctx0, y, head_dim * n_heads, n_seq_tokens, n_seqs, y->nb[2], y->nb[3], 0); + cur = build_lora_mm(model.layers[il].ssm_out, y); + cb(cur, "mamba_out_proj", il); + } + + // {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens} + cur = ggml_reshape_2d(ctx0, cur, cur->ne[0], n_seq_tokens * n_seqs); + cb(cur, "mamba_out", il); + + return cur; + } +}; + +struct llm_build_arcee : public llm_graph_context { + llm_build_arcee(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + + ggml_tensor * cur; + ggml_tensor * inpL; + + inpL = build_inp_embd(model.tok_embd); + + // inp_pos - contains the positions + ggml_tensor * inp_pos = build_inp_pos(); + + auto * inp_attn = build_attn_inp_kv_unified(); - q = ggml_cont(ctx0, ggml_transpose(ctx0, q)); - k = ggml_cont(ctx0, ggml_transpose(ctx0, k)); + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; - ggml_tensor * kq = ggml_mul_mat(ctx0, k, q); + ggml_tensor * inp_out_ids = build_inp_out_ids(); - kq = ggml_soft_max_ext(ctx0, kq, nullptr, 1.0f/sqrtf(float(hparams.posnet.n_embd)), 0.0f); + for (int il = 0; il < n_layer; ++il) { + ggml_tensor * inpSA = inpL; - cur = ggml_mul_mat(ctx0, kq, v); + // norm + cur = build_norm(inpL, + model.layers[il].attn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "attn_norm", il); - cur = ggml_conv_1d_ph(ctx0, layer.attn_o, cur, 1, 1); - cur = ggml_add(ctx0, cur, layer.attn_o_b); + // self-attention + { + // rope freq factors for llama3; may return nullptr for llama2 and other models + ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); - cur = ggml_add(ctx0, cur, inpL); - } break; - case 5: - { - cur = build_norm(cur, - layer.norm, - layer.norm_b, - LLM_NORM_GROUP, 0); - } break; - default: GGML_ABORT("unknown posnet layer"); - }; - } + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } - cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } - cur = build_norm(cur, - model.tok_norm, - model.tok_norm_b, - LLM_NORM, -1); + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } - cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - inpL = cur; + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); - // convnext - for (uint32_t il = 0; il < hparams.convnext.n_layer; ++il) { - const auto & layer = model.layers[il].convnext; + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, rope_factors, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); - cur = inpL; + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); - cur = ggml_conv_1d_dw_ph(ctx0, layer.dw, cur, 1, 1); - cur = ggml_add(ctx0, cur, layer.dw_b); + cur = build_attn(inp_attn, gf, + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(cur, "attn_out", il); + } - cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); + } - cur = build_norm(cur, - layer.norm, - layer.norm_b, - LLM_NORM, -1); + ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); + cb(ffn_inp, "ffn_inp", il); + + // feed-forward network + // ARCEE uses relu^2 instead of silu + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); cur = build_ffn(cur, - layer.pw1, layer.pw1_b, NULL, - NULL, NULL, NULL, - layer.pw2, layer.pw2_b, NULL, + model.layers[il].ffn_up, NULL, NULL, + NULL, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, NULL, - LLM_FFN_GELU, LLM_FFN_SEQ, il); + LLM_FFN_RELU_SQR, LLM_FFN_SEQ, il); + cb(cur, "ffn_out", il); - cur = ggml_mul(ctx0, cur, layer.gamma); + cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "ffn_out", il); - cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); + cur = build_cvec(cur, il); + cb(cur, "l_out", il); - inpL = ggml_add(ctx0, cur, inpL); + // input for next layer + inpL = cur; } cur = inpL; - cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur)); - cur = build_norm(cur, - model.output_norm, - model.output_norm_b, - LLM_NORM, -1); + model.output_norm, NULL, + LLM_NORM_RMS, -1); + + cb(cur, "result_norm", -1); + res->t_embd = cur; // lm_head cur = build_lora_mm(model.output, cur); - cur = ggml_add(ctx0, cur, model.output_b); - - cb(cur, "result_embd", -1); - res->t_embd = cur; + cb(cur, "result_output", -1); + res->t_logits = cur; ggml_build_forward_expand(gf, cur); } }; -struct llm_build_plm : public llm_graph_context { - llm_build_plm(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { - const float kq_scale = 1.0f/sqrtf(float(hparams.n_embd_head_k)); +struct llm_build_hunyuan_moe : public llm_graph_context { + llm_build_hunyuan_moe(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; - const uint32_t n_embd_head_qk_rope = hparams.n_rot; - const uint32_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot; - const uint32_t kv_lora_rank = hparams.n_lora_kv; + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); ggml_tensor * cur; ggml_tensor * inpL; - // {n_embd, n_tokens} inpL = build_inp_embd(model.tok_embd); // inp_pos - contains the positions @@ -12897,6 +16362,10 @@ struct llm_build_plm : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + const float kq_scale = 1.0f / sqrtf(float(n_embd_head)); + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; @@ -12906,103 +16375,70 @@ struct llm_build_plm : public llm_graph_context { LLM_NORM_RMS, il); cb(cur, "attn_norm", il); - // self_attention + // self-attention { - ggml_tensor * q = NULL; - q = ggml_mul_mat(ctx0, model.layers[il].wq, cur); - cb(q, "q", il); - - // split into {n_head * n_embd_head_qk_nope, n_tokens} - ggml_tensor * q_nope = ggml_view_3d(ctx0, q, n_embd_head_qk_nope, n_head, n_tokens, - ggml_row_size(q->type, hparams.n_embd_head_k), - ggml_row_size(q->type, hparams.n_embd_head_k * n_head), - 0); - cb(q_nope, "q_nope", il); - - // and {n_head * n_embd_head_qk_rope, n_tokens} - ggml_tensor * q_pe = ggml_view_3d(ctx0, q, n_embd_head_qk_rope, n_head, n_tokens, - ggml_row_size(q->type, hparams.n_embd_head_k), - ggml_row_size(q->type, hparams.n_embd_head_k * n_head), - ggml_row_size(q->type, n_embd_head_qk_nope)); - cb(q_pe, "q_pe", il); - - // {n_embd, kv_lora_rank + n_embd_head_qk_rope} * {n_embd, n_tokens} -> {kv_lora_rank + n_embd_head_qk_rope, n_tokens} - ggml_tensor * kv_pe_compresseed = ggml_mul_mat(ctx0, model.layers[il].wkv_a_mqa, cur); - cb(kv_pe_compresseed, "kv_pe_compresseed", il); - - // split into {kv_lora_rank, n_tokens} - ggml_tensor * kv_compressed = ggml_view_2d(ctx0, kv_pe_compresseed, kv_lora_rank, n_tokens, - kv_pe_compresseed->nb[1], - 0); - cb(kv_compressed, "kv_compressed", il); - - // and {n_embd_head_qk_rope, n_tokens} - ggml_tensor * k_pe = ggml_view_3d(ctx0, kv_pe_compresseed, n_embd_head_qk_rope, 1, n_tokens, - kv_pe_compresseed->nb[1], - kv_pe_compresseed->nb[1], - ggml_row_size(kv_pe_compresseed->type, kv_lora_rank)); - cb(k_pe, "k_pe", il); - - kv_compressed = build_norm(kv_compressed, - model.layers[il].attn_kv_a_norm, NULL, - LLM_NORM_RMS, il); - cb(kv_compressed, "kv_compressed", il); - - // {kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)} * {kv_lora_rank, n_tokens} -> {n_head * (n_embd_head_qk_nope + n_embd_head_v), n_tokens} - ggml_tensor * kv = ggml_mul_mat(ctx0, model.layers[il].wkv_b, kv_compressed); - cb(kv, "kv", il); + // rope freq factors for llama3; may return nullptr for llama2 and other models + ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); - // split into {n_head * n_embd_head_qk_nope, n_tokens} - ggml_tensor * k_nope = ggml_view_3d(ctx0, kv, n_embd_head_qk_nope, n_head, n_tokens, - ggml_row_size(kv->type, n_embd_head_qk_nope + hparams.n_embd_head_v), - ggml_row_size(kv->type, n_head * (n_embd_head_qk_nope + hparams.n_embd_head_v)), - 0); - cb(k_nope, "k_nope", il); + // compute Q and K and RoPE them + ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); + cb(Qcur, "Qcur", il); + if (model.layers[il].bq) { + Qcur = ggml_add(ctx0, Qcur, model.layers[il].bq); + cb(Qcur, "Qcur", il); + } - // and {n_head * n_embd_head_v, n_tokens} - ggml_tensor * v_states = ggml_view_3d(ctx0, kv, hparams.n_embd_head_v, n_head, n_tokens, - ggml_row_size(kv->type, (n_embd_head_qk_nope + hparams.n_embd_head_v)), - ggml_row_size(kv->type, (n_embd_head_qk_nope + hparams.n_embd_head_v)*n_head), - ggml_row_size(kv->type, (n_embd_head_qk_nope))); - cb(v_states, "v_states", il); + ggml_tensor * Kcur = build_lora_mm(model.layers[il].wk, cur); + cb(Kcur, "Kcur", il); + if (model.layers[il].bk) { + Kcur = ggml_add(ctx0, Kcur, model.layers[il].bk); + cb(Kcur, "Kcur", il); + } - v_states = ggml_cont(ctx0, v_states); - cb(v_states, "v_states", il); + ggml_tensor * Vcur = build_lora_mm(model.layers[il].wv, cur); + cb(Vcur, "Vcur", il); + if (model.layers[il].bv) { + Vcur = ggml_add(ctx0, Vcur, model.layers[il].bv); + cb(Vcur, "Vcur", il); + } - v_states = ggml_view_2d(ctx0, v_states, hparams.n_embd_head_v * n_head, n_tokens, - ggml_row_size(kv->type, hparams.n_embd_head_v * n_head), - 0); - cb(v_states, "v_states", il); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - q_pe = ggml_rope_ext( - ctx0, q_pe, inp_pos, nullptr, + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); - cb(q_pe, "q_pe", il); - // shared RoPE key - k_pe = ggml_rope_ext( - ctx0, k_pe, inp_pos, nullptr, + cb(Qcur, "Qcur", il); + cb(Kcur, "Kcur", il); + cb(Vcur, "Vcur", il); + + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, rope_factors, n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, ext_factor, attn_factor, beta_fast, beta_slow ); - cb(k_pe, "k_pe", il); - - ggml_tensor * q_states = ggml_concat(ctx0, q_nope, q_pe, 0); - cb(q_states, "q_states", il); - ggml_tensor * k_states = ggml_concat(ctx0, k_nope, ggml_repeat(ctx0, k_pe, q_pe), 0); - cb(k_states, "k_states", il); + Kcur = build_norm(Kcur, + model.layers[il].attn_k_norm, nullptr, + LLM_NORM_RMS, il); + cb(Kcur, "Kcur_norm", il); + + Qcur = build_norm(Qcur, + model.layers[il].attn_q_norm, nullptr, + LLM_NORM_RMS, il); + cb(Qcur, "Qcur_norm", il); cur = build_attn(inp_attn, gf, - model.layers[il].wo, NULL, - q_states, k_states, v_states, nullptr, nullptr, kq_scale, il); + model.layers[il].wo, model.layers[il].bo, + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(cur, "attn_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -13011,19 +16447,39 @@ struct llm_build_plm : public llm_graph_context { cb(ffn_inp, "ffn_inp", il); cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, il); + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); cb(cur, "ffn_norm", il); - cur = build_ffn(cur, - model.layers[il].ffn_up, NULL, NULL, - NULL, NULL, NULL, - model.layers[il].ffn_down, NULL, NULL, + // feed-forward network (non-MoE) + ggml_tensor * cur_mlp = build_ffn(cur, + model.layers[il].ffn_up_shexp, NULL, NULL, + model.layers[il].ffn_gate_shexp, NULL, NULL, + model.layers[il].ffn_down_shexp, NULL, NULL, NULL, - LLM_FFN_RELU_SQR, LLM_FFN_SEQ, il); - cb(cur, "ffn_out", il); + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur_mlp, "ffn_mlp", il); - cur = ggml_add(ctx0, cur, ffn_inp); + // MoE branch + ggml_tensor * cur_moe = build_moe_ffn(cur, + model.layers[il].ffn_gate_inp, + model.layers[il].ffn_up_exps, + model.layers[il].ffn_gate_exps, + model.layers[il].ffn_down_exps, + nullptr, + n_expert, n_expert_used, + LLM_FFN_SILU, + true, // norm_topk_prob + false, + 0.0, + LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, + il); + cb(cur_moe, "ffn_moe_out", il); + + ggml_tensor * ffn_out = ggml_add(ctx0, cur_moe, cur_mlp); + cb(ffn_out, "ffn_out", il); + + cur = ggml_add(ctx0, ffn_out, ffn_inp); cur = build_cvec(cur, il); cb(cur, "l_out", il); @@ -13041,8 +16497,8 @@ struct llm_build_plm : public llm_graph_context { cb(cur, "result_norm", -1); res->t_embd = cur; + // lm_head cur = build_lora_mm(model.output, cur); - cb(cur, "result_output", -1); res->t_logits = cur; @@ -13050,8 +16506,13 @@ struct llm_build_plm : public llm_graph_context { } }; -struct llm_build_bailingmoe : public llm_graph_context { - llm_build_bailingmoe(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { +struct llm_build_smollm3 : public llm_graph_context { + llm_build_smollm3(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params) { + const int64_t n_embd_head = hparams.n_embd_head_v; + + GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); + GGML_ASSERT(n_embd_head == hparams.n_rot); + ggml_tensor * cur; ggml_tensor * inpL; @@ -13062,9 +16523,15 @@ struct llm_build_bailingmoe : public llm_graph_context { auto * inp_attn = build_attn_inp_kv_unified(); + const float kq_scale = hparams.f_attention_scale == 0.0f ? 1.0f/sqrtf(float(n_embd_head)) : hparams.f_attention_scale; + + ggml_tensor * inp_out_ids = build_inp_out_ids(); + for (int il = 0; il < n_layer; ++il) { ggml_tensor * inpSA = inpL; + const bool use_rope = (il + 1) % hparams.n_no_rope_layer_step != 0; + // norm cur = build_norm(inpL, model.layers[il].attn_norm, NULL, @@ -13073,9 +16540,6 @@ struct llm_build_bailingmoe : public llm_graph_context { // self-attention { - // rope freq factors for llama3; may return nullptr for llama2 and other models - ggml_tensor * rope_factors = model.get_rope_factors(cparams, il); - // compute Q and K and RoPE them ggml_tensor * Qcur = build_lora_mm(model.layers[il].wq, cur); cb(Qcur, "Qcur", il); @@ -13098,21 +16562,23 @@ struct llm_build_bailingmoe : public llm_graph_context { cb(Vcur, "Vcur", il); } - Qcur = ggml_reshape_3d(ctx0, Qcur, n_rot, n_head, n_tokens); - Kcur = ggml_reshape_3d(ctx0, Kcur, n_rot, n_head_kv, n_tokens); - Vcur = ggml_reshape_3d(ctx0, Vcur, n_rot, n_head_kv, n_tokens); + Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); + Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); + Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); - Qcur = ggml_rope_ext( - ctx0, Qcur, inp_pos, rope_factors, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); + if (use_rope) { + Qcur = ggml_rope_ext( + ctx0, Qcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); - Kcur = ggml_rope_ext( - ctx0, Kcur, inp_pos, rope_factors, - n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, - ext_factor, attn_factor, beta_fast, beta_slow - ); + Kcur = ggml_rope_ext( + ctx0, Kcur, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + } cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); @@ -13120,12 +16586,11 @@ struct llm_build_bailingmoe : public llm_graph_context { cur = build_attn(inp_attn, gf, model.layers[il].wo, model.layers[il].bo, - Qcur, Kcur, Vcur, nullptr, nullptr, 1.0f/sqrtf(float(n_rot)), il); + Qcur, Kcur, Vcur, nullptr, nullptr, kq_scale, il); + cb(cur, "attn_out", il); } - if (il == n_layer - 1) { - // skip computing output for unused tokens - ggml_tensor * inp_out_ids = build_inp_out_ids(); + if (il == n_layer - 1 && inp_out_ids) { cur = ggml_get_rows(ctx0, cur, inp_out_ids); inpSA = ggml_get_rows(ctx0, inpSA, inp_out_ids); } @@ -13133,40 +16598,24 @@ struct llm_build_bailingmoe : public llm_graph_context { ggml_tensor * ffn_inp = ggml_add(ctx0, cur, inpSA); cb(ffn_inp, "ffn_inp", il); - cur = build_norm(ffn_inp, - model.layers[il].ffn_norm, NULL, - LLM_NORM_RMS, il); - cb(cur, "ffn_norm", il); - - ggml_tensor * moe_out = - build_moe_ffn(cur, - model.layers[il].ffn_gate_inp, - model.layers[il].ffn_up_exps, - model.layers[il].ffn_gate_exps, - model.layers[il].ffn_down_exps, - nullptr, - n_expert, n_expert_used, - LLM_FFN_SILU, hparams.expert_weights_norm, - false, hparams.expert_weights_scale, - LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX, - il); - cb(moe_out, "ffn_moe_out", il); - - // FFN shared expert + // feed-forward network { - ggml_tensor * ffn_shexp = build_ffn(cur, - model.layers[il].ffn_up_shexp, NULL, NULL, - model.layers[il].ffn_gate_shexp, NULL, NULL, - model.layers[il].ffn_down_shexp, NULL, NULL, + cur = build_norm(ffn_inp, + model.layers[il].ffn_norm, NULL, + LLM_NORM_RMS, il); + cb(cur, "ffn_norm", il); + + cur = build_ffn(cur, + model.layers[il].ffn_up, model.layers[il].ffn_up_b, NULL, + model.layers[il].ffn_gate, model.layers[il].ffn_gate_b, NULL, + model.layers[il].ffn_down, model.layers[il].ffn_down_b, NULL, NULL, LLM_FFN_SILU, LLM_FFN_PAR, il); - cb(ffn_shexp, "ffn_shexp", il); - - cur = ggml_add(ctx0, moe_out, ffn_shexp); cb(cur, "ffn_out", il); } cur = ggml_add(ctx0, cur, ffn_inp); + cb(cur, "ffn_out", il); cur = build_cvec(cur, il); cb(cur, "l_out", il); @@ -13194,69 +16643,285 @@ struct llm_build_bailingmoe : public llm_graph_context { } }; +struct llm_build_lfm2 : public llm_graph_context { + const llama_model & model; + + llm_build_lfm2(const llama_model & model, const llm_graph_params & params, ggml_cgraph * gf) : llm_graph_context(params), model(model) { + + ggml_tensor * cur = build_inp_embd(model.tok_embd); + cb(cur, "model.embed_tokens", -1); + + ggml_tensor * inp_pos = build_inp_pos(); + auto * inp_hybrid = build_inp_mem_hybrid(); + ggml_tensor * inp_out_ids = build_inp_out_ids(); + + for (int il = 0; il < n_layer; ++il) { + auto * prev_cur = cur; + cur = build_norm(cur, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "model.layers.{}.operator_norm", il); + + cur = hparams.is_recurrent(il) ? + build_shortconv_block(gf, cur, inp_hybrid->get_recr(), il) : + build_attn_block(gf, cur, inp_pos, inp_hybrid->get_attn(), il) ; + + if (il == n_layer - 1 && inp_out_ids) { + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + prev_cur = ggml_get_rows(ctx0, prev_cur, inp_out_ids); + } + + cur = ggml_add(ctx0, prev_cur, cur); + cur = ggml_add(ctx0, cur, build_feed_forward(cur, il)); + } + + cur = build_norm(cur, model.tok_norm, NULL, LLM_NORM_RMS, -1); + cb(cur, "model.embedding_norm", -1); + res->t_embd = cur; + + // lm_head is tied with embeddings + cur = build_lora_mm(model.tok_embd, cur); + cb(cur, "lm_head", -1); + + res->t_logits = cur; + + ggml_build_forward_expand(gf, cur); + } + + ggml_tensor * build_feed_forward(ggml_tensor * cur, + int il) const { + cur = build_norm(cur, model.layers[il].ffn_norm, NULL, LLM_NORM_RMS, il); + cb(cur, "model.layers.{}.ffn_norm", il); + + GGML_ASSERT(!model.layers[il].ffn_up_b); + GGML_ASSERT(!model.layers[il].ffn_gate_b); + GGML_ASSERT(!model.layers[il].ffn_down_b); + cur = build_ffn(cur, + model.layers[il].ffn_up, NULL, NULL, + model.layers[il].ffn_gate, NULL, NULL, + model.layers[il].ffn_down, NULL, NULL, + NULL, + LLM_FFN_SILU, LLM_FFN_PAR, il); + cb(cur, "model.layers.{}.feed_forward.w2", il); + + return cur; + } + + ggml_tensor * build_attn_block(ggml_cgraph * gf, + ggml_tensor * cur, + ggml_tensor * inp_pos, + llm_graph_input_attn_kv_unified * inp_attn, + int il) const { + GGML_ASSERT(hparams.n_embd_v_gqa(il) == hparams.n_embd_k_gqa(il)); + auto const n_embd_head = hparams.n_embd_head_v; + auto const n_head_kv = hparams.n_head_kv(il); + + auto * q = build_lora_mm(model.layers[il].wq, cur); + cb(q, "model.layers.{}.self_attn.q_proj", il); + auto * k = build_lora_mm(model.layers[il].wk, cur); + cb(k, "model.layers.{}.self_attn.k_proj", il); + auto * v = build_lora_mm(model.layers[il].wv, cur); + cb(v, "model.layers.{}.self_attn.v_proj", il); + + q = ggml_reshape_3d(ctx0, q, n_embd_head, n_head, n_tokens); + k = ggml_reshape_3d(ctx0, k, n_embd_head, n_head_kv, n_tokens); + v = ggml_reshape_3d(ctx0, v, n_embd_head, n_head_kv, n_tokens); + + // qk norm + q = build_norm(q, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, il); + cb(q, "model.layers.{}.self_attn.q_layernorm", il); + k = build_norm(k, model.layers[il].attn_k_norm, NULL, LLM_NORM_RMS, il); + cb(k, "model.layers.{}.self_attn.k_layernorm", il); + + // RoPE + q = ggml_rope_ext( + ctx0, q, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + k = ggml_rope_ext( + ctx0, k, inp_pos, nullptr, + n_rot, rope_type, n_ctx_orig, freq_base, freq_scale, + ext_factor, attn_factor, beta_fast, beta_slow + ); + + cur = build_attn(inp_attn, gf, model.layers[il].wo, NULL, + q, k, v, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il); + + cb(cur, "model.layers.{}.self_attn.out_proj", il); + + return cur; + } + + ggml_tensor * build_shortconv_block(ggml_cgraph * gf, + ggml_tensor * cur, + llm_graph_input_rs * inp_recr, + int il) { + const auto * mctx_cur = static_cast(mctx)->get_recr(); + const uint32_t kv_head = mctx_cur->get_head(); + const int64_t n_seq_tokens = ubatch.n_seq_tokens; + const int64_t n_seqs = ubatch.n_seqs; + GGML_ASSERT(n_seqs != 0); + GGML_ASSERT(ubatch.equal_seqs()); + GGML_ASSERT(ubatch.n_tokens == n_seq_tokens * n_seqs); + + GGML_ASSERT(hparams.n_shortconv_l_cache > 1); + const uint32_t d_conv = hparams.n_shortconv_l_cache - 1; + + // {n_embd, n_tokens} => {n_embd, n_seq_tokens, n_seqs} + cur = ggml_reshape_3d(ctx0, cur, cur->ne[0], n_seq_tokens, n_seqs); + + auto * bcx = build_lora_mm(model.layers[il].shortconv.in_proj, cur); + cb(bcx, "model.layers.{}.conv.in_proj", il); + + constexpr auto n_chunks = 3; + GGML_ASSERT(bcx->ne[0] % n_chunks == 0); + auto const chunk_size = bcx->ne[0] / n_chunks; + auto * b = ggml_view_3d(ctx0, bcx, chunk_size, bcx->ne[1], bcx->ne[2], bcx->nb[1], bcx->nb[2], 0*chunk_size*ggml_element_size(bcx)); + auto * c = ggml_view_3d(ctx0, bcx, chunk_size, bcx->ne[1], bcx->ne[2], bcx->nb[1], bcx->nb[2], 1*chunk_size*ggml_element_size(bcx)); + auto * x = ggml_view_3d(ctx0, bcx, chunk_size, bcx->ne[1], bcx->ne[2], bcx->nb[1], bcx->nb[2], 2*chunk_size*ggml_element_size(bcx)); + + auto * bx = ggml_transpose(ctx0, ggml_mul(ctx0, b, x)); + + // read conv state + auto * conv_state = mctx_cur->get_r_l(il); + auto * conv_rs = build_rs(inp_recr, gf, conv_state, hparams.n_embd_r(), n_seqs); + auto * conv = ggml_reshape_3d(ctx0, conv_rs, d_conv, hparams.n_embd, n_seqs); + + bx = ggml_concat(ctx0, conv, bx, 0); + GGML_ASSERT(bx->ne[0] > conv->ne[0]); + + // last d_conv columns is a new conv state + auto * new_conv = ggml_view_3d(ctx0, bx, conv->ne[0], bx->ne[1], bx->ne[2], bx->nb[1], bx->nb[2], (bx->ne[0] - conv->ne[0])*ggml_element_size(bx)); + GGML_ASSERT(ggml_are_same_shape(conv, new_conv)); + + // write new conv conv state + ggml_build_forward_expand( + gf, + ggml_cpy( + ctx0, + new_conv, + ggml_view_1d( + ctx0, + conv_state, + ggml_nelements(new_conv), + kv_head*d_conv*n_embd*ggml_element_size(new_conv) + ) + ) + ); + + auto * conv_kernel = model.layers[il].shortconv.conv; + auto * conv_out = ggml_ssm_conv(ctx0, bx, conv_kernel); + cb(conv_out, "model.layers.{}.conv.conv", il); + + auto * y = ggml_mul(ctx0, c, conv_out); + y = build_lora_mm(model.layers[il].shortconv.out_proj, y); + cb(y, "model.layers.{}.conv.out_proj", il); + // {n_embd, n_seq_tokens, n_seqs} => {n_embd, n_tokens} + y = ggml_reshape_2d(ctx0, y, y->ne[0], n_seq_tokens * n_seqs); + + return y; + } +}; + llama_memory_i * llama_model::create_memory(const llama_memory_params & params, llama_cparams & cparams) const { llama_memory_i * res; switch (arch) { + // Models that need specific instantiation should be handled in the + // switch statement case LLM_ARCH_BERT: case LLM_ARCH_JINA_BERT_V2: case LLM_ARCH_NOMIC_BERT: case LLM_ARCH_NOMIC_BERT_MOE: + case LLM_ARCH_NEO_BERT: case LLM_ARCH_WAVTOKENIZER_DEC: + case LLM_ARCH_DREAM: { res = nullptr; } break; - case LLM_ARCH_MAMBA: - case LLM_ARCH_RWKV6: - case LLM_ARCH_RWKV6QWEN2: - case LLM_ARCH_RWKV7: - case LLM_ARCH_ARWKV7: - { - res = new llama_kv_cache_recurrent( - *this, - GGML_TYPE_F32, - GGML_TYPE_F32, - cparams.offload_kqv, - std::max((uint32_t) 1, cparams.n_seq_max), - cparams.n_seq_max); - } break; + // Models that need standard caching should rely on recurrent/hybrid + // checks default: { - const auto padding = llama_kv_cache_unified::get_padding(cparams); + if (llm_arch_is_recurrent(arch)) { + res = new llama_memory_recurrent( + *this, + nullptr, + GGML_TYPE_F32, + GGML_TYPE_F32, + cparams.offload_kqv, + std::max((uint32_t) 1, cparams.n_seq_max), + cparams.n_seq_max); + } else if (llm_arch_is_hybrid(arch)) { + const auto padding = llama_kv_cache_unified::get_padding(cparams); + + cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding); + + res = new llama_memory_hybrid( + /* model */ *this, + /* attn_type_k */ params.type_k, + /* attn_type_v */ params.type_v, + /* attn_v_trans */ !cparams.flash_attn, + /* attn_kv_size */ cparams.n_ctx, + /* attn_n_pad */ padding, + /* attn_n_swa */ hparams.n_swa, + /* attn_swa_type */ hparams.swa_type, + /* recurrent_type_k */ GGML_TYPE_F32, + /* recurrent_type_v */ GGML_TYPE_F32, + /* recurrent_kv_size */ std::max((uint32_t) 1, cparams.n_seq_max), + /* n_seq_max */ cparams.n_seq_max, + /* offload */ cparams.offload_kqv, + /* filter_attn */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr, + /* filter_recr */ (arch == LLM_ARCH_FALCON_H1) ? [&](int32_t) { return true; } : (llama_memory_hybrid::layer_filter_cb)nullptr); + } else { + const auto padding = llama_kv_cache_unified::get_padding(cparams); - cparams.n_ctx = GGML_PAD(cparams.n_ctx, padding); + uint32_t n_ctx_per_stream = cparams.n_ctx; - LLAMA_LOG_DEBUG("%s: n_ctx = %u (padded)\n", __func__, cparams.n_ctx); + if (!cparams.kv_unified) { + n_ctx_per_stream = (cparams.n_ctx + cparams.n_seq_max - 1)/cparams.n_seq_max; + n_ctx_per_stream = GGML_PAD(n_ctx_per_stream, padding); - if (hparams.swa_type != LLAMA_SWA_TYPE_NONE) { - GGML_ASSERT(hparams.is_swa_any()); + cparams.n_ctx = n_ctx_per_stream*cparams.n_seq_max; + } else { + n_ctx_per_stream = GGML_PAD(n_ctx_per_stream, padding); - res = new llama_kv_cache_unified_iswa( - *this, - params.type_k, - params.type_v, - !cparams.flash_attn, - cparams.offload_kqv, - params.swa_full, - cparams.n_ctx, - cparams.n_seq_max, - cparams.n_ubatch, - padding); - } else { - GGML_ASSERT(!hparams.is_swa_any()); + cparams.n_ctx = n_ctx_per_stream; + } - res = new llama_kv_cache_unified( - *this, - nullptr, - params.type_k, - params.type_v, - !cparams.flash_attn, - cparams.offload_kqv, - cparams.n_ctx, - cparams.n_seq_max, - padding, - hparams.n_swa, - hparams.swa_type); + LLAMA_LOG_DEBUG("%s: n_ctx = %u (padded)\n", __func__, cparams.n_ctx); + + if (hparams.swa_type != LLAMA_SWA_TYPE_NONE) { + GGML_ASSERT(hparams.is_swa_any()); + + res = new llama_kv_cache_unified_iswa( + *this, + params.type_k, + params.type_v, + !cparams.flash_attn, + cparams.offload_kqv, + params.swa_full, + cparams.kv_unified, + n_ctx_per_stream, + cparams.n_seq_max, + cparams.n_ubatch, + padding); + } else { + GGML_ASSERT(!hparams.is_swa_any()); + + res = new llama_kv_cache_unified( + *this, + nullptr, + params.type_k, + params.type_v, + !cparams.flash_attn, + cparams.offload_kqv, + cparams.kv_unified, + n_ctx_per_stream, + cparams.n_seq_max, + padding, + hparams.n_swa, + hparams.swa_type); + } } } } @@ -13264,10 +16929,10 @@ llama_memory_i * llama_model::create_memory(const llama_memory_params & params, return res; } -llm_graph_result_ptr llama_model::build_graph( - const llm_graph_params & params, - ggml_cgraph * gf, - llm_graph_type type) const { +ggml_cgraph * llama_model::build_graph(const llm_graph_params & params) const { + // TODO: temporary - will refactor this to keep the "gf" instance in the llm_graph_context and avoid passing it everywhere + auto * gf = params.res->get_gf(); + std::unique_ptr llm; switch (arch) { @@ -13310,6 +16975,10 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_NEO_BERT: + { + llm = std::make_unique(*this, params, gf); + } break; case LLM_ARCH_BLOOM: { llm = std::make_unique(*this, params, gf); @@ -13330,6 +16999,11 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_DREAM: + { + llm = std::make_unique(*this, params, gf); + } + break; case LLM_ARCH_QWEN2VL: { llm = std::make_unique(*this, params, gf); @@ -13363,6 +17037,10 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_PLAMO2: + { + llm = std::make_unique(*this, params, gf); + } break; case LLM_ARCH_GPT2: { llm = std::make_unique(*this, params, gf); @@ -13395,14 +17073,23 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_GEMMA3N: + { + llm = std::make_unique(*this, params, gf); + } break; case LLM_ARCH_STARCODER2: { llm = std::make_unique(*this, params, gf); } break; case LLM_ARCH_MAMBA: + case LLM_ARCH_MAMBA2: { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_JAMBA: + { + llm = std::make_unique(*this, params, gf); + } break; case LLM_ARCH_XVERSE: { llm = std::make_unique(*this, params, gf); @@ -13465,7 +17152,7 @@ llm_graph_result_ptr llama_model::build_graph( } break; case LLM_ARCH_T5: { - switch (type) { + switch (params.gtype) { case LLM_GRAPH_TYPE_ENCODER: llm = std::make_unique(*this, params, gf); break; @@ -13516,6 +17203,10 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_GRANITE_HYBRID: + { + llm = std::make_unique(*this, params, gf); + } break; case LLM_ARCH_CHAMELEON: { llm = std::make_unique(*this, params, gf); @@ -13532,6 +17223,38 @@ llm_graph_result_ptr llama_model::build_graph( { llm = std::make_unique(*this, params, gf); } break; + case LLM_ARCH_DOTS1: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_ARCEE: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_ERNIE4_5: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_ERNIE4_5_MOE: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_HUNYUAN_MOE: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_SMOLLM3: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_FALCON_H1: + { + llm = std::make_unique(*this, params, gf); + } break; + case LLM_ARCH_LFM2: + { + llm = std::make_unique(*this, params, gf); + } break; default: GGML_ABORT("fatal error"); } @@ -13539,7 +17262,7 @@ llm_graph_result_ptr llama_model::build_graph( // add on pooling layer llm->build_pooling(gf, cls, cls_b, cls_out, cls_out_b); - return std::move(llm->res); + return llm->res->get_gf(); } // @@ -13648,6 +17371,8 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { case LLM_ARCH_REFACT: case LLM_ARCH_BLOOM: case LLM_ARCH_MAMBA: + case LLM_ARCH_MAMBA2: + case LLM_ARCH_JAMBA: case LLM_ARCH_JINA_BERT_V2: case LLM_ARCH_T5: case LLM_ARCH_T5ENCODER: @@ -13679,12 +17404,19 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { case LLM_ARCH_GLM4: case LLM_ARCH_GRANITE: case LLM_ARCH_GRANITE_MOE: + case LLM_ARCH_GRANITE_HYBRID: case LLM_ARCH_CHAMELEON: case LLM_ARCH_BAILINGMOE: + case LLM_ARCH_NEO_BERT: + case LLM_ARCH_SMOLLM3: + case LLM_ARCH_ARCEE: + case LLM_ARCH_ERNIE4_5: + case LLM_ARCH_ERNIE4_5_MOE: return LLAMA_ROPE_TYPE_NORM; // the pairs of head values are offset by n_rot/2 case LLM_ARCH_FALCON: + case LLM_ARCH_FALCON_H1: case LLM_ARCH_GROK: case LLM_ARCH_DBRX: case LLM_ARCH_BERT: @@ -13694,6 +17426,7 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { case LLM_ARCH_BITNET: case LLM_ARCH_QWEN: case LLM_ARCH_QWEN2: + case LLM_ARCH_DREAM: case LLM_ARCH_QWEN2MOE: case LLM_ARCH_QWEN3: case LLM_ARCH_QWEN3MOE: @@ -13703,9 +17436,11 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { case LLM_ARCH_PHI3: case LLM_ARCH_PHIMOE: case LLM_ARCH_PLAMO: + case LLM_ARCH_PLAMO2: case LLM_ARCH_GEMMA: case LLM_ARCH_GEMMA2: case LLM_ARCH_GEMMA3: + case LLM_ARCH_GEMMA3N: case LLM_ARCH_STARCODER2: case LLM_ARCH_OPENELM: case LLM_ARCH_GPTNEOX: @@ -13714,6 +17449,9 @@ llama_rope_type llama_model_rope_type(const llama_model * model) { case LLM_ARCH_NEMOTRON: case LLM_ARCH_EXAONE: case LLM_ARCH_MINICPM3: + case LLM_ARCH_DOTS1: + case LLM_ARCH_HUNYUAN_MOE: + case LLM_ARCH_LFM2: return LLAMA_ROPE_TYPE_NEOX; case LLM_ARCH_QWEN2VL: @@ -13787,7 +17525,7 @@ const char * llama_model_chat_template(const llama_model * model, const char * n // do not extend this list unless absolutely necessary // Mistral-Small-2503 does not have built-in chat template llama_vocab_pre_type pre_type = model->vocab.get_pre_type(); - if (pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) { + if (!name && pre_type == LLAMA_VOCAB_PRE_TYPE_TEKKEN && model->layers.size() == 40) { return "mistral-v7-tekken"; } @@ -13821,14 +17559,7 @@ llama_token llama_model_decoder_start_token(const llama_model * model) { } bool llama_model_is_recurrent(const llama_model * model) { - switch (model->arch) { - case LLM_ARCH_MAMBA: return true; - case LLM_ARCH_RWKV6: return true; - case LLM_ARCH_RWKV6QWEN2: return true; - case LLM_ARCH_RWKV7: return true; - case LLM_ARCH_ARWKV7: return true; - default: return false; - } + return llm_arch_is_recurrent(model->arch); } const std::vector> & llama_internal_get_tensor_map(const llama_model * model) { diff --git a/src/llama-model.h b/src/llama-model.h index 18b714620bbcf..094e23808a813 100644 --- a/src/llama-model.h +++ b/src/llama-model.h @@ -32,16 +32,21 @@ enum llm_type { LLM_TYPE_190M, LLM_TYPE_220M, LLM_TYPE_250M, + LLM_TYPE_256M, LLM_TYPE_270M, LLM_TYPE_335M, + LLM_TYPE_350M, LLM_TYPE_410M, LLM_TYPE_450M, LLM_TYPE_475M, + LLM_TYPE_700M, LLM_TYPE_770M, LLM_TYPE_780M, + LLM_TYPE_0_3B, LLM_TYPE_0_5B, LLM_TYPE_0_6B, LLM_TYPE_1B, + LLM_TYPE_1_2B, LLM_TYPE_1_3B, LLM_TYPE_1_4B, LLM_TYPE_1_5B, @@ -73,6 +78,7 @@ enum llm_type { LLM_TYPE_40B, LLM_TYPE_65B, LLM_TYPE_70B, + LLM_TYPE_142B, LLM_TYPE_236B, LLM_TYPE_290B, LLM_TYPE_314B, @@ -92,8 +98,13 @@ enum llm_type { LLM_TYPE_57B_A14B, LLM_TYPE_17B_16E, // llama4 Scout LLM_TYPE_17B_128E, // llama4 Maverick + LLM_TYPE_A13B, + LLM_TYPE_21B_A3B, // Ernie MoE small LLM_TYPE_30B_A3B, LLM_TYPE_235B_A22B, + LLM_TYPE_300B_A47B, // Ernie MoE big + LLM_TYPE_E2B, + LLM_TYPE_E4B, }; std::string llama_rope_scaling_type_name(llama_rope_scaling_type rope_scaling_type); @@ -149,6 +160,12 @@ struct llama_layer_convnext { struct ggml_tensor * gamma = nullptr; }; +struct llama_layer_shortconv { + struct ggml_tensor * in_proj = nullptr; + struct ggml_tensor * conv = nullptr; + struct ggml_tensor * out_proj = nullptr; +}; + struct llama_layer { // normalization struct ggml_tensor * attn_norm = nullptr; @@ -168,6 +185,10 @@ struct llama_layer { struct ggml_tensor * ffn_sub_norm = nullptr; struct ggml_tensor * attn_norm_cross = nullptr; struct ggml_tensor * attn_norm_enc = nullptr; + struct ggml_tensor * ssm_norm = nullptr; + struct ggml_tensor * ssm_dt_norm = nullptr; + struct ggml_tensor * ssm_b_norm = nullptr; + struct ggml_tensor * ssm_c_norm = nullptr; // attention struct ggml_tensor * wq = nullptr; @@ -315,9 +336,24 @@ struct llama_layer { struct ggml_tensor * ffn_up_scale = nullptr; struct ggml_tensor * ffn_down_scale = nullptr; + // altup & laurel + struct ggml_tensor * per_layer_inp_gate = nullptr; + struct ggml_tensor * per_layer_proj = nullptr; + struct ggml_tensor * per_layer_post_norm = nullptr; + struct ggml_tensor * altup_correct_coef = nullptr; + struct ggml_tensor * altup_correct_scale = nullptr; + struct ggml_tensor * altup_predict_coef = nullptr; + struct ggml_tensor * altup_router = nullptr; + struct ggml_tensor * altup_router_norm = nullptr; + struct ggml_tensor * laurel_l = nullptr; + struct ggml_tensor * laurel_r = nullptr; + struct ggml_tensor * laurel_post_norm = nullptr; + struct llama_layer_posnet posnet; struct llama_layer_convnext convnext; + + struct llama_layer_shortconv shortconv; }; struct llama_model { @@ -353,6 +389,13 @@ struct llama_model { struct ggml_tensor * conv1d = nullptr; struct ggml_tensor * conv1d_b = nullptr; + // gemma3n altup + struct ggml_tensor * tok_embd_per_layer = nullptr; + struct ggml_tensor * altup_proj = nullptr; + struct ggml_tensor * altup_unembd_proj = nullptr; + struct ggml_tensor * per_layer_model_proj = nullptr; + struct ggml_tensor * per_layer_proj_norm = nullptr; + std::vector layers; llama_model_params params; @@ -411,10 +454,7 @@ struct llama_model { llama_memory_i * create_memory(const llama_memory_params & params, llama_cparams & cparams) const; // TODO: move this to new llm_arch_model_i interface - llm_graph_result_ptr build_graph( - const llm_graph_params & params, - ggml_cgraph * gf, - llm_graph_type type) const; + ggml_cgraph * build_graph(const llm_graph_params & params) const; private: struct impl; diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index 159b1307a4c5d..a00af7a1d1758 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -1,5 +1,4 @@ #include "llama-quant.h" - #include "llama-impl.h" #include "llama-model.h" #include "llama-model-loader.h" @@ -27,6 +26,56 @@ static void zeros(std::ofstream & file, size_t n) { } } +static std::string remap_layer(const std::string & orig_name, const std::vector & prune, std::map & mapped, int & next_id) { + if (prune.empty()) { + return orig_name; + } + + static const std::regex pattern(R"(blk\.(\d+)\.)"); + if (std::smatch match; std::regex_search(orig_name, match, pattern)) { + const int blk = std::stoi(match[1]); + std::string new_name = orig_name; + + if (mapped.count(blk)) { + // Already mapped, do nothing + } else if (std::find(prune.begin(), prune.end(), blk) != prune.end()) { + mapped[blk] = ""; + } else if (blk < prune.front()) { + mapped[blk] = std::to_string(blk); + next_id = blk + 1; + } else { + mapped[blk] = std::to_string(next_id); + ++next_id; + } + + return mapped[blk].empty() ? mapped[blk] : new_name.replace(match.position(1), match.length(1), mapped[blk]); + } + + return orig_name; +} + +static std::string remap_imatrix (const std::string & orig_name, const std::map & mapped) { + if (mapped.empty()) { + return orig_name; + } + + static const std::regex pattern(R"(blk\.(\d+)\.)"); + if (std::smatch match; std::regex_search(orig_name, match, pattern)) { + const std::string blk(match[1]); + std::string new_name = orig_name; + + for (const auto & p : mapped) { + if (p.second == blk) { + LLAMA_LOG_DEBUG("(blk.%d imatrix) ", p.first); + return new_name.replace(match.position(1), match.length(1), std::to_string(p.first)); + } + } + GGML_ABORT("\n%s: imatrix mapping error for %s\n", __func__, orig_name.c_str()); + } + + return orig_name; +} + struct quantize_state_impl { const llama_model & model; const llama_model_quantize_params * params; @@ -174,7 +223,7 @@ static ggml_type llama_tensor_get_type(quantize_state_impl & qs, ggml_type new_t new_type = GGML_TYPE_Q6_K; } } - } else if (name == "token_embd.weight") { + } else if (name == "token_embd.weight" || name == "per_layer_token_embd.weight") { if (qs.params->token_embedding_type < GGML_TYPE_COUNT) { new_type = qs.params->token_embedding_type; } else { @@ -568,6 +617,11 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: const size_t align = GGUF_DEFAULT_ALIGNMENT; gguf_context_ptr ctx_out { gguf_init_empty() }; + std::vector prune_list = {}; + if (params->prune_layers) { + prune_list = *static_cast *>(params->prune_layers); + } + // copy the KV pairs from the input file gguf_set_kv (ctx_out.get(), ml.meta.get()); gguf_set_val_u32(ctx_out.get(), "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV @@ -585,7 +639,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: if (o.tag == LLAMA_KV_OVERRIDE_TYPE_FLOAT) { gguf_set_val_f32(ctx_out.get(), o.key, o.val_f64); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_INT) { - gguf_set_val_i32(ctx_out.get(), o.key, o.val_i64); + // Setting type to UINT32. See https://github.com/ggml-org/llama.cpp/pull/14182 for context + gguf_set_val_u32(ctx_out.get(), o.key, (uint32_t)abs(o.val_i64)); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_BOOL) { gguf_set_val_bool(ctx_out.get(), o.key, o.val_bool); } else if (o.tag == LLAMA_KV_OVERRIDE_TYPE_STR) { @@ -596,12 +651,32 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: } } + std::map mapped; + int blk_id = 0; + int pruned_attention_w = 0; + // make a list of weights std::vector tensors; tensors.reserve(ml.weights_map.size()); for (const auto & it : ml.weights_map) { + const std::string remapped_name(remap_layer(it.first, prune_list, mapped, blk_id)); + if (remapped_name.empty()) { + if (it.first.find("attn_v.weight") != std::string::npos || + it.first.find("attn_qkv.weight") != std::string::npos || + it.first.find("attn_kv_b.weight") != std::string::npos) { + pruned_attention_w++; + } + LLAMA_LOG_DEBUG("%s: pruning tensor %s\n", __func__, it.first.c_str()); + continue; + } else if (remapped_name != it.first) { + ggml_set_name(it.second.tensor, remapped_name.c_str()); + LLAMA_LOG_DEBUG("%s: tensor %s remapped to %s\n", __func__, it.first.c_str(), ggml_get_name(it.second.tensor)); + } tensors.push_back(&it.second); } + if (!prune_list.empty()) { + gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_BLOCK_COUNT).c_str(), blk_id); + } // keep_split requires that the weights are sorted by split index if (params->keep_split) { @@ -639,7 +714,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: if (llama_model_has_encoder(&model)) { n_attn_layer *= 3; } - GGML_ASSERT((qs.n_attention_wv == n_attn_layer) && "n_attention_wv is unexpected"); + GGML_ASSERT((qs.n_attention_wv == n_attn_layer - pruned_attention_w) && "n_attention_wv is unexpected"); } size_t total_size_org = 0; @@ -680,7 +755,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: for (size_t i = 0; i < ctx_outs.size(); ++i) { gguf_set_val_u16(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_NO).c_str(), i); gguf_set_val_u16(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_COUNT).c_str(), n_split); - gguf_set_val_i32(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), ml.n_tensors); + gguf_set_val_i32(ctx_outs[i].get(), ml.llm_kv(LLM_KV_SPLIT_TENSORS_COUNT).c_str(), (int32_t)tensors.size()); } } @@ -755,6 +830,13 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: // NOTE: can't use LLM_TN here because the layer number is not known quantize &= name.find("ffn_gate_inp.weight") == std::string::npos; + // these are very small (e.g. 4x4) + quantize &= name.find("altup") == std::string::npos; + quantize &= name.find("laurel") == std::string::npos; + + // these are not too big so keep them as it is + quantize &= name.find("per_layer_model_proj") == std::string::npos; + // do not quantize positional embeddings and token types (BERT) quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_POS_EMBD, "weight"); quantize &= name != LLM_TN(model.arch)(LLM_TENSOR_TOKEN_TYPES, "weight"); @@ -762,6 +844,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: // do not quantize Mamba's small yet 2D weights // NOTE: can't use LLM_TN here because the layer number is not known quantize &= name.find("ssm_conv1d.weight") == std::string::npos; + quantize &= name.find("shortconv.conv.weight") == std::string::npos; // do not quantize RWKV's small yet 2D weights quantize &= name.find("time_mix_first.weight") == std::string::npos; @@ -801,8 +884,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: if (std::regex pattern(tname); std::regex_search(tensor_name, pattern)) { if (qtype != new_type) { LLAMA_LOG_DEBUG("(overriding %s) ", ggml_type_name(new_type)); - new_type = qtype; - break; // if two or more types are specified for the tensor, first match wins + new_type = qtype; // if two or more types are specified for the same tensor, the last match wins } } } @@ -831,7 +913,7 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: const float * imatrix = nullptr; if (imatrix_data) { - auto it = imatrix_data->find(tensor->name); + auto it = imatrix_data->find(remap_imatrix(tensor->name, mapped)); if (it == imatrix_data->end()) { LLAMA_LOG_INFO("\n====== %s: did not find weights for %s\n", __func__, tensor->name); } else { @@ -946,6 +1028,7 @@ llama_model_quantize_params llama_model_quantize_default_params() { /*.imatrix =*/ nullptr, /*.kv_overrides =*/ nullptr, /*.tensor_type =*/ nullptr, + /*.prune_layers =*/ nullptr }; return result; diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index 905d7c4281d9c..2181c01e31a87 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -351,6 +352,7 @@ struct llm_tokenizer_bpe : llm_tokenizer { break; case LLAMA_VOCAB_PRE_TYPE_STABLELM2: case LLAMA_VOCAB_PRE_TYPE_QWEN2: + case LLAMA_VOCAB_PRE_TYPE_HUNYUAN: regex_exprs = { // original regex from tokenizer.json // "(?i:'s|'t|'re|'ve|'m|'ll|'d)|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+" @@ -403,6 +405,13 @@ struct llm_tokenizer_bpe : llm_tokenizer { "[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))*((?=[\\p{L}])([^A-Z]))+(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|[^\\r\\n\\p{L}\\p{N}]?((?=[\\p{L}])([^a-z]))+((?=[\\p{L}])([^A-Z]))*(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])?|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+", }; break; + case LLAMA_VOCAB_PRE_TYPE_KIMI_K2: + regex_exprs = { + // K2 trigger pattern - this will activate the custom K2 handler in unicode.cpp + // The custom handler implements all K2 patterns with proper Han character exclusion + "\\p{Han}+", + }; + break; case LLAMA_VOCAB_PRE_TYPE_SUPERBPE: regex_exprs = { "\\p{N}+", @@ -1195,6 +1204,284 @@ struct llm_tokenizer_rwkv_session { const llm_tokenizer_rwkv & tokenizer; }; +struct llm_tokenizer_plamo2 : llm_tokenizer { + llm_tokenizer_plamo2(const llama_vocab & vocab) { + build(vocab); + } + + void build(const llama_vocab & vocab) { + // Reset internal structures + tokens_.clear(); + bytes_.assign(256, 0); + to_suffix_id_.clear(); + table_.clear(); + + // Build token list and byte mapping + std::unordered_map suffix_to_score; + std::unordered_map token_to_id; + + for (size_t token_id = 0; token_id < vocab.n_tokens(); ++token_id) { + const auto & entry = vocab.get_token_data(token_id); + tokens_.push_back(entry.text); + token_to_id[entry.text] = static_cast(token_id); + + // Handle byte tokens + if (vocab.is_byte(token_id)) { + if (entry.text.length() == 6 && entry.text.substr(0, 3) == "<0x" && entry.text.back() == '>') { + std::string hex_str = entry.text.substr(3, 2); + int byte_val = std::stoi(hex_str, nullptr, 16); + bytes_[byte_val] = static_cast(token_id); + } + continue; + } + + // Add token and all its suffixes to suffix_to_score + suffix_to_score[entry.text] = entry.score; + + // Extract suffixes character by character (UTF-8 aware) + std::vector cpts = unicode_cpts_from_utf8(entry.text); + for (size_t i = 1; i < cpts.size(); ++i) { + std::string suffix; + for (size_t j = i; j < cpts.size(); ++j) { + suffix += unicode_cpt_to_utf8(cpts[j]); + } + if (suffix_to_score.find(suffix) == suffix_to_score.end()) { + suffix_to_score[suffix] = std::numeric_limits::quiet_NaN(); + } + } + } + + // Check that all byte tokens are set + for (int i = 0; i < 256; ++i) { + if (bytes_[i] == 0) { + throw std::runtime_error("Byte token for <0x" + std::to_string(i) + "> is not set"); + } + } + + // Build suffix list in lexicographical order of reversed strings + std::vector suffixes; + for (const auto & pair : suffix_to_score) { + suffixes.push_back(pair.first); + } + suffixes.push_back(""); // Empty suffix + + std::sort(suffixes.begin(), suffixes.end(), [](const std::string & a, const std::string & b) { + std::string rev_a(a.rbegin(), a.rend()); + std::string rev_b(b.rbegin(), b.rend()); + return rev_a < rev_b; + }); + + // Build suffix_to_id and to_suffix_id_ + std::unordered_map suffix_to_id; + int32_t num_pieces = 0; + + for (const auto & suffix : suffixes) { + suffix_to_id[suffix] = num_pieces; + if (!suffix.empty()) { + std::vector cpts = unicode_cpts_from_utf8(suffix); + + std::string remaining; + for (size_t i = 1; i < cpts.size(); ++i) { + remaining += unicode_cpt_to_utf8(cpts[i]); + } + + int64_t piece_code = (static_cast(cpts[0]) << 32) | suffix_to_id[remaining]; + to_suffix_id_[piece_code] = num_pieces; + + // Count number of pieces for this suffix + int32_t pieces_for_suffix = 1; // sentinel row + for (int32_t piece_length = static_cast(cpts.size()); piece_length > 0; --piece_length) { + std::string piece; + for (int32_t i = 0; i < piece_length; ++i) { + piece += unicode_cpt_to_utf8(cpts[i]); + } + if (suffix_to_score.find(piece) != suffix_to_score.end()) { + pieces_for_suffix++; + } + } + num_pieces += pieces_for_suffix; + } else { + num_pieces++; // Empty suffix contributes one piece (sentinel row) + } + } + + // Build flattened table + table_.resize(num_pieces, std::vector(4, 0)); + int32_t table_idx = 0; + + for (const auto & suffix : suffixes) { + // Add all prefixes of the suffix to the table (in decreasing order of length) + std::vector cpts = unicode_cpts_from_utf8(suffix); + for (int32_t piece_length = static_cast(cpts.size()); piece_length > 0; --piece_length) { + std::string piece; + for (int32_t i = 0; i < piece_length; ++i) { + piece += unicode_cpt_to_utf8(cpts[i]); + } + + auto score_it = suffix_to_score.find(piece); + if (score_it == suffix_to_score.end()) { + continue; + } + + table_[table_idx][TABLE_PIECE_LENGTH] = piece_length; + auto token_it = token_to_id.find(piece); + table_[table_idx][TABLE_TOKEN_ID] = (token_it != token_to_id.end()) ? token_it->second : -1; + + float score = score_it->second; + table_[table_idx][TABLE_SCORE] = std::isfinite(score) ? + static_cast(std::round(score * 1e4)) : INVALID_SCORE; + table_[table_idx][TABLE_PIECE_ID] = suffix_to_id[piece]; + + table_idx++; + } + + // Add sentinel row + table_[table_idx][TABLE_PIECE_LENGTH] = 1; + table_[table_idx][TABLE_TOKEN_ID] = -1; + table_[table_idx][TABLE_SCORE] = UNKNOWN_SCORE; + table_idx++; + } + } + + std::vector encode(const std::string & text) const { + std::vector unicode_data = unicode_cpts_from_utf8(text); + // Skip the first code point if it is a BOM (Byte Order Mark) + if (!unicode_data.empty() && unicode_data[0] == 0xFEFF) { + unicode_data.erase(unicode_data.begin()); + } + + if (unicode_data.empty()) { + return {}; + } + + const size_t data_len = unicode_data.size(); + + // Initialize scores array (dynamic programming) + std::vector scores(data_len + 1, static_cast(1) << 60); + scores[data_len] = 0; + + // Path array to track best tokenization + std::vector> path(data_len + 1, std::vector(3, 0)); + + int32_t suffix_id = 0; + + // Process from end to beginning + for (int i = static_cast(data_len) - 1; i >= 0; --i) { + uint32_t c = unicode_data[i]; + + // Find next suffix ID + for (size_t p = suffix_id; p < table_.size(); ++p) { + int64_t piece_code = (static_cast(c) << 32) | table_[p][TABLE_PIECE_ID]; + auto it = to_suffix_id_.find(piece_code); + suffix_id = (it != to_suffix_id_.end()) ? it->second : 0; + + if (suffix_id > 0 || table_[p][TABLE_SCORE] == UNKNOWN_SCORE) { + break; + } + } + + // Update best path + for (size_t p = suffix_id; p < table_.size(); ++p) { + int32_t score = table_[p][TABLE_SCORE]; + if (score > INVALID_SCORE) { + int32_t piece_length = table_[p][TABLE_PIECE_LENGTH]; + int64_t s = scores[i + piece_length] - score; + + if (s < scores[i]) { + scores[i] = s; + path[i][PATH_TOKEN_LENGTH] = piece_length; + path[i][PATH_TOKEN_ID] = table_[p][TABLE_TOKEN_ID]; + path[i][PATH_NUM_TOKENS] = path[i + piece_length][PATH_NUM_TOKENS] + 1; + + if (score == UNKNOWN_SCORE) { + // Add UTF-8 byte count + path[i][PATH_NUM_TOKENS] += (c >= 0x80) + (c >= 0x800) + (c >= 0x10000); + } + } + } + + if (score == UNKNOWN_SCORE) { + break; + } + } + } + + // Decode the best path + std::vector token_ids; + token_ids.reserve(path[0][PATH_NUM_TOKENS]); + + int pos = 0; + while (pos < static_cast(data_len)) { + if (path[pos][PATH_TOKEN_ID] >= 0) { + token_ids.push_back(path[pos][PATH_TOKEN_ID]); + } else { + // Fall back to byte tokens + uint32_t c = unicode_data[pos]; + int s = 1 + (c >= 0x80) + (c >= 0x800) + (c >= 0x10000); + + for (int i = 0; i < s; ++i) { + uint8_t b; + if (s == 1) { + b = c; + } else { + if (i == 0) { + b = (0xF00 >> s) & 0xFF; + } else { + b = 0x80; + } + } + token_ids.push_back(bytes_[b | ((c >> ((s - i - 1) * 6)) & 0x3F)]); + } + } + + assert(path[pos][PATH_TOKEN_LENGTH] > 0); + pos += path[pos][PATH_TOKEN_LENGTH]; + } + + return token_ids; + } +private: + // Constants for table structure + static constexpr int32_t TABLE_PIECE_LENGTH = 0; + static constexpr int32_t TABLE_TOKEN_ID = 1; + static constexpr int32_t TABLE_SCORE = 2; + static constexpr int32_t TABLE_PIECE_ID = 3; + + // Constants for path array + static constexpr int32_t PATH_TOKEN_LENGTH = 0; + static constexpr int32_t PATH_TOKEN_ID = 1; + static constexpr int32_t PATH_NUM_TOKENS = 2; + + // Score constants + static constexpr int32_t INVALID_SCORE = -20000000; + static constexpr int32_t UNKNOWN_SCORE = -10000000; + + // List of tokens in the vocabulary + std::vector tokens_; + + // Mapping from byte code point to token ID (for byte fallback) + std::vector bytes_; + + // Mapping from piece code to suffix ID + std::unordered_map to_suffix_id_; + + // Flattened table representing the Trie structure + // Each row contains: [piece_length, token_id, score, piece_id] + std::vector> table_; +}; + +struct llm_tokenizer_plamo2_session { + llm_tokenizer_plamo2_session(const llm_tokenizer_plamo2 & tokenizer) : tokenizer(tokenizer) {} + + void tokenize(const std::string & text, std::vector & output) { + std::vector tokens = tokenizer.encode(text); + output.insert(output.end(), tokens.begin(), tokens.end()); + } + +private: + const llm_tokenizer_plamo2 & tokenizer; +}; + // // impl // @@ -1269,6 +1556,7 @@ struct llama_vocab::impl { bool add_space_prefix = false; bool add_bos = false; bool add_eos = false; + bool add_sep = false; bool ignore_merges = false; bool clean_spaces = false; // clean_up_tokenization_spaces bool remove_extra_whitespaces = false; @@ -1421,6 +1709,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_sep_id = 102; special_pad_id = 0; special_mask_id = 103; + + add_sep = true; } else if (tokenizer_model == "gpt2") { type = LLAMA_VOCAB_TYPE_BPE; @@ -1495,6 +1785,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { special_unk_id = LLAMA_TOKEN_NULL; special_sep_id = LLAMA_TOKEN_NULL; special_pad_id = LLAMA_TOKEN_NULL; + } else if (tokenizer_model == "plamo2") { + type = LLAMA_VOCAB_TYPE_PLAMO2; + + // PLaMo-2 default special tokens (these will be overridden by model config) + special_bos_id = 1; // <|plamo:bos|> + special_eos_id = 2; // <|plamo:eos|> + special_unk_id = 0; // <|plamo:unk|> + special_sep_id = LLAMA_TOKEN_NULL; + special_pad_id = 3; // <|plamo:pad|> + special_mask_id = LLAMA_TOKEN_NULL; } else { throw std::runtime_error(format("unknown tokenizer: '%s'", tokenizer_model.c_str())); } @@ -1519,7 +1819,10 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "llama-v3" || tokenizer_pre == "llama-bpe"|| tokenizer_pre == "falcon3" || - tokenizer_pre == "pixtral") { + tokenizer_pre == "falcon-h1" || + tokenizer_pre == "pixtral" || + tokenizer_pre == "midm-2.0" || + tokenizer_pre == "lfm2") { pre_type = LLAMA_VOCAB_PRE_TYPE_LLAMA3; ignore_merges = true; add_bos = true; @@ -1550,12 +1853,16 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "jina-es" || tokenizer_pre == "jina-de" || tokenizer_pre == "gigachat" || - tokenizer_pre == "jina-v1-en" || tokenizer_pre == "jina-v2-es" || tokenizer_pre == "jina-v2-de" || + tokenizer_pre == "a.x-4.0") { + pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2; + } else if ( + tokenizer_pre == "jina-v1-en" || tokenizer_pre == "jina-v2-code" || tokenizer_pre == "roberta-bpe") { pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2; + add_sep = true; } else if ( tokenizer_pre == "refact") { pre_type = LLAMA_VOCAB_PRE_TYPE_REFACT; @@ -1650,6 +1957,14 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { tokenizer_pre == "seed-coder") { pre_type = LLAMA_VOCAB_PRE_TYPE_SEED_CODER; clean_spaces = false; + } else if ( + tokenizer_pre == "hunyuan") { + pre_type = LLAMA_VOCAB_PRE_TYPE_HUNYUAN; + clean_spaces = false; + } else if ( + tokenizer_pre == "kimi-k2") { + pre_type = LLAMA_VOCAB_PRE_TYPE_KIMI_K2; + clean_spaces = false; } else { throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str())); } @@ -1665,6 +1980,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { clean_spaces = true; add_bos = true; add_eos = false; + add_sep = true; } else if (type == LLAMA_VOCAB_TYPE_UGM) { pre_type = LLAMA_VOCAB_PRE_TYPE_DEFAULT; add_bos = false; @@ -1801,7 +2117,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } } - // Handle add_bos and add_eos + // Handle add_bos, add_eos and add_sep { bool temp = true; @@ -1811,6 +2127,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { if (ml.get_key(LLM_KV_TOKENIZER_ADD_EOS, temp, false)) { add_eos = temp; } + if (ml.get_key(LLM_KV_TOKENIZER_ADD_SEP, temp, false)) { + add_sep = temp; + } } // auto-detect special tokens by text @@ -1829,6 +2148,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "" || t.first == "_" || t.first == "<|end▁of▁sentence|>" // DeepSeek + || t.first == "" // smoldocling ) { special_eot_id = t.second; if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { @@ -1987,6 +2307,8 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { || t.first == "<|eom_id|>" || t.first == "" || t.first == "_" + || t.first == "<|end_of_text|>" + || t.first == "" // smoldocling ) { special_eog_ids.insert(t.second); if ((id_to_token[t.second].attr & LLAMA_TOKEN_ATTR_CONTROL) == 0) { @@ -2059,9 +2381,9 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { //NOTE: Per token attributes are missing from the GGUF file. //TODO: Extract attributes from GGUF file. { - auto _contains_any = [] (const std::string & str, const std::vector & substrs) -> bool { + auto _contains_any = [] (const std::string & str, const std::vector & substrs) -> bool { for (const auto & substr : substrs) { - if (str.find(substr) < std::string::npos) { + if (str.find(substr) != std::string::npos) { return true; } } @@ -2123,13 +2445,14 @@ enum llama_vocab_type llama_vocab::impl::get_type() const { std::string llama_vocab::impl::type_name() const{ switch (type) { - case LLAMA_VOCAB_TYPE_NONE: return "no vocab"; - case LLAMA_VOCAB_TYPE_SPM: return "SPM"; - case LLAMA_VOCAB_TYPE_BPE: return "BPE"; - case LLAMA_VOCAB_TYPE_WPM: return "WPM"; - case LLAMA_VOCAB_TYPE_UGM: return "UGM"; - case LLAMA_VOCAB_TYPE_RWKV: return "RWKV"; - default: return "unknown"; + case LLAMA_VOCAB_TYPE_NONE: return "no vocab"; + case LLAMA_VOCAB_TYPE_SPM: return "SPM"; + case LLAMA_VOCAB_TYPE_BPE: return "BPE"; + case LLAMA_VOCAB_TYPE_WPM: return "WPM"; + case LLAMA_VOCAB_TYPE_UGM: return "UGM"; + case LLAMA_VOCAB_TYPE_RWKV: return "RWKV"; + case LLAMA_VOCAB_TYPE_PLAMO2: return "PLaMo2"; + default: return "unknown"; } } @@ -2212,6 +2535,9 @@ void llama_vocab::impl::init_tokenizer(enum llama_vocab_type type) { case LLAMA_VOCAB_TYPE_RWKV: tokenizer = std::make_unique(vocab); break; + case LLAMA_VOCAB_TYPE_PLAMO2: + tokenizer = std::make_unique(vocab); + break; default: GGML_ABORT("unsupported vocab type"); } @@ -2544,6 +2870,23 @@ std::vector llama_vocab::impl::tokenize( if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { std::string text = fragment.raw_text.substr(fragment.offset, fragment.length); +#ifdef PRETOKENIZERDEBUG + LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str()); +#endif + + session.tokenize(text, output); + } else { // if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_TOKEN) + output.push_back(fragment.token); + } + } + } break; + case LLAMA_VOCAB_TYPE_PLAMO2: + { + llm_tokenizer_plamo2_session session(*static_cast(tokenizer.get())); + for (const auto & fragment : fragment_buffer) { + if (fragment.type == FRAGMENT_BUFFER_VARIANT_TYPE_RAW_TEXT) { + std::string text = fragment.raw_text.substr(fragment.offset, fragment.length); + #ifdef PRETOKENIZERDEBUG LLAMA_LOG_WARN("TT: (%ld %ld %ld) '%s'\n", text.length(), fragment.offset, fragment.length, text.c_str()); #endif @@ -2642,6 +2985,24 @@ int32_t llama_vocab::impl::token_to_piece(llama_token token, char * buf, int32_t memcpy(buf, result.data(), result.size()); return (int)result.size(); } + case LLAMA_VOCAB_TYPE_PLAMO2: { + // PLaMo-2 uses similar token handling as BPE/SPM + if (vocab.is_byte(token)) { + // Handle byte tokens like <0xXX> + if (token_text.length() == 6 && token_text.substr(0, 3) == "<0x" && token_text.back() == '>') { + int hex_val = std::stoi(token_text.substr(3, 2), nullptr, 16); + if (length < 1) { + return -1; + } + buf[0] = static_cast(hex_val); + return 1; + } + } + + // Normal token - just copy the text + std::string result = token_text; + return _try_copy(result.data(), result.size()); + } default: GGML_ABORT("fatal error"); } @@ -2886,6 +3247,12 @@ llama_token llama_vocab::byte_to_token(uint8_t ch) const { case LLAMA_VOCAB_TYPE_BPE: { return pimpl->token_to_id.at(unicode_byte_to_utf8(ch)); } + case LLAMA_VOCAB_TYPE_PLAMO2: { + // PLaMo-2 uses byte tokens in format <0xXX> + char hex_str[8]; + snprintf(hex_str, sizeof(hex_str), "<0x%02X>", ch); + return pimpl->token_to_id.at(hex_str); + } default: GGML_ABORT("fatal error"); } @@ -2987,6 +3354,10 @@ llama_token llama_vocab::token_fim_sep() const { return pimpl->special_fim_sep_id; } +llama_token llama_vocab::token_mask() const { + return pimpl->special_mask_id; +} + bool llama_vocab::get_add_space_prefix() const { return pimpl->add_space_prefix; } @@ -2999,6 +3370,10 @@ bool llama_vocab::get_add_eos() const { return pimpl->add_eos; } +bool llama_vocab::get_add_sep() const { + return pimpl->add_sep; +} + bool llama_vocab::get_ignore_merges() const { return pimpl->ignore_merges; } @@ -3059,6 +3434,11 @@ int32_t llama_vocab::tokenize( bool add_special, bool parse_special) const { auto res = tokenize(std::string(text, text_len), add_special, parse_special); + if (res.size() >= static_cast(std::numeric_limits::max())) { + LLAMA_LOG_ERROR("%s: tokenization result size %zu exceeds int32_t limit\n", __func__, res.size()); + return std::numeric_limits::min(); + } + if (n_tokens_max < (int) res.size()) { // LLAMA_LOG_ERROR("%s: too many tokens\n", __func__); return -((int) res.size()); @@ -3190,6 +3570,10 @@ bool llama_vocab_get_add_eos(const struct llama_vocab * vocab) { return vocab->get_add_eos(); } +bool llama_vocab_get_add_sep(const struct llama_vocab * vocab) { + return vocab->get_add_sep(); +} + llama_token llama_vocab_fim_pre(const struct llama_vocab * vocab) { return vocab->token_fim_pre(); } @@ -3214,6 +3598,10 @@ llama_token llama_vocab_fim_sep(const struct llama_vocab * vocab) { return vocab->token_fim_sep(); } +llama_token llama_vocab_mask(const struct llama_vocab* vocab) { + return vocab->token_mask(); +} + // deprecated const char * llama_token_get_text(const struct llama_vocab * vocab, llama_token token) { return llama_vocab_get_text(vocab, token); @@ -3350,4 +3738,3 @@ int32_t llama_detokenize( bool unparse_special) { return vocab->detokenize(tokens, n_tokens, text, text_len_max, remove_special, unparse_special); } - diff --git a/src/llama-vocab.h b/src/llama-vocab.h index daa6cf3082f90..842b129e86171 100644 --- a/src/llama-vocab.h +++ b/src/llama-vocab.h @@ -6,6 +6,48 @@ #include #include +// pre-tokenization types +enum llama_vocab_pre_type { + LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0, + LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1, + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2, + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3, + LLAMA_VOCAB_PRE_TYPE_FALCON = 4, + LLAMA_VOCAB_PRE_TYPE_MPT = 5, + LLAMA_VOCAB_PRE_TYPE_STARCODER = 6, + LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, + LLAMA_VOCAB_PRE_TYPE_REFACT = 8, + LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9, + LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10, + LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11, + LLAMA_VOCAB_PRE_TYPE_OLMO = 12, + LLAMA_VOCAB_PRE_TYPE_DBRX = 13, + LLAMA_VOCAB_PRE_TYPE_SMAUG = 14, + LLAMA_VOCAB_PRE_TYPE_PORO = 15, + LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16, + LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17, + LLAMA_VOCAB_PRE_TYPE_VIKING = 18, + LLAMA_VOCAB_PRE_TYPE_JAIS = 19, + LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20, + LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21, + LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, + LLAMA_VOCAB_PRE_TYPE_BLOOM = 23, + LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24, + LLAMA_VOCAB_PRE_TYPE_EXAONE = 25, + LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, + LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, + LLAMA_VOCAB_PRE_TYPE_GPT4O = 29, + LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30, + LLAMA_VOCAB_PRE_TYPE_TRILLION = 31, + LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32, + LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, + LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, + LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, + LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, + LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37, +}; + struct LLM_KV; struct llama_model_loader; @@ -59,6 +101,7 @@ struct llama_vocab { llama_token token_sep() const; llama_token token_nl () const; llama_token token_pad() const; + llama_token token_mask() const; llama_token token_prefix() const; llama_token token_middle() const; @@ -74,6 +117,7 @@ struct llama_vocab { bool get_add_space_prefix () const; bool get_add_bos () const; bool get_add_eos () const; + bool get_add_sep () const; bool get_ignore_merges () const; bool get_clean_spaces () const; bool get_remove_extra_whitespaces () const; diff --git a/src/llama.cpp b/src/llama.cpp index 2f06e0f8ce12d..34906cdb62844 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -198,14 +198,18 @@ static struct llama_model * llama_model_load_from_file_impl( // if using single GPU mode, remove all except the main GPU if (params.split_mode == LLAMA_SPLIT_MODE_NONE) { - if (params.main_gpu < 0 || params.main_gpu >= (int)model->devices.size()) { - LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %d)\n", __func__, params.main_gpu, (int)model->devices.size()); - llama_model_free(model); - return nullptr; + if (params.main_gpu < 0) { + model->devices.clear(); + } else { + if (params.main_gpu >= (int)model->devices.size()) { + LLAMA_LOG_ERROR("%s: invalid value for main_gpu: %d (available devices: %zu)\n", __func__, params.main_gpu, model->devices.size()); + llama_model_free(model); + return nullptr; + } + ggml_backend_dev_t main_gpu = model->devices[params.main_gpu]; + model->devices.clear(); + model->devices.push_back(main_gpu); } - ggml_backend_dev_t main_gpu = model->devices[params.main_gpu]; - model->devices.clear(); - model->devices.push_back(main_gpu); } for (auto * dev : model->devices) { diff --git a/src/unicode.cpp b/src/unicode.cpp index e63bb4ab085d6..65f3665171582 100644 --- a/src/unicode.cpp +++ b/src/unicode.cpp @@ -204,12 +204,17 @@ static inline std::wstring unicode_wstring_from_utf8(const std::string & s) { // disable C++17 deprecation warning for std::codecvt_utf8 # pragma clang diagnostic push # pragma clang diagnostic ignored "-Wdeprecated-declarations" +#elif defined(__GNUC__) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wdeprecated-declarations" #endif std::wstring_convert> conv; #if defined(__clang__) # pragma clang diagnostic pop +#elif defined(__GNUC__) +# pragma GCC diagnostic pop #endif return conv.from_bytes(s); @@ -552,6 +557,178 @@ static std::vector unicode_regex_split_stl(const std::string & text, con return bpe_offsets; } +// K2 system regex patterns (from tokenization_kimi.py): +// [\p{Han}]+|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?i:'s|'t|'re|'ve|'m|'ll|'d)?|[^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?i:'s|'t|'re|'ve|'m|'ll|'d)?|\p{N}{1,3}| ?[^\s\p{L}\p{N}]+[\r\n]*|\s*[\r\n]+|\s+(?!\S)|\s+ +static std::vector unicode_regex_split_custom_kimi_k2(const std::string & text, const std::vector & offsets) { + std::vector bpe_offsets; + bpe_offsets.reserve(offsets.size()); + + const auto cpts = unicode_cpts_from_utf8(text); + + size_t start = 0; + for (auto offset : offsets) { + const size_t offset_ini = start; + const size_t offset_end = start + offset; + assert(offset_end <= cpts.size()); + start = offset_end; + + static const uint32_t OUT_OF_RANGE = 0xFFFFFFFF; + auto _get_cpt = [&] (const size_t pos) -> uint32_t { + return (offset_ini <= pos && pos < offset_end) ? cpts[pos] : OUT_OF_RANGE; + }; + + auto _get_flags = [&] (const size_t pos) -> unicode_cpt_flags { + return (offset_ini <= pos && pos < offset_end) ? unicode_cpt_flags_from_cpt(cpts[pos]) : unicode_cpt_flags{}; + }; + + size_t _prev_end = offset_ini; + auto _add_token = [&] (const size_t end) -> size_t { + assert(_prev_end <= end && end <= offset_end); + size_t len = end - _prev_end; + if (len > 0) { + bpe_offsets.push_back(len); + } + _prev_end = end; + return len; + }; + + for (size_t pos = offset_ini; pos < offset_end; /*pos++*/ ) { + const uint32_t cpt = _get_cpt(pos); + const auto flags = _get_flags(pos); + + // Pattern 1: [\p{Han}]+ (Chinese characters) + if (unicode_cpt_is_han(cpt)) { + while (unicode_cpt_is_han(_get_cpt(pos))) { + pos++; + } + _add_token(pos); + continue; + } + + // Pattern 2 & 3: Letter words excluding Han characters with optional contractions + // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)? + // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)? + // Check if current char is a letter OR if current char could be a leading char and next char is a letter + bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) || + (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) && + _get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1))); + + if (is_letter_pattern) { + // Handle optional leading non-letter/non-number character + bool has_leading_char = false; + if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) { + has_leading_char = true; + pos++; + } + + // Match letter sequence (excluding Han characters) + bool has_letters = false; + while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { + has_letters = true; + pos++; + } + + // Only proceed if we found letters (after potentially skipping leading char) + if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) { + if (!has_letters) pos++; // consume the first letter if we didn't already + + // Continue consuming letters + while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) { + pos++; + } + + // Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d) + if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) { + uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1)); + if (cpt_next == 's' || cpt_next == 't' || cpt_next == 'm' || cpt_next == 'd') { + pos += 2; + } else if (pos + 2 < offset_end) { + uint32_t cpt_next_next = unicode_tolower(_get_cpt(pos + 2)); + if ((cpt_next == 'r' && cpt_next_next == 'e') || + (cpt_next == 'v' && cpt_next_next == 'e') || + (cpt_next == 'l' && cpt_next_next == 'l')) { + pos += 3; + } + } + } + + _add_token(pos); + continue; + } else if (has_leading_char) { + // We consumed a leading char but found no letters, backtrack + pos--; + } + } + + // Pattern 4: \p{N}{1,3} (numbers 1-3 digits) + if (flags.is_number) { + size_t ini = pos; + while (_get_flags(pos).is_number) { + if (++pos - ini >= 3) { + _add_token(pos); + ini = pos; + } + } + _add_token(pos); + continue; + } + + // Pattern 5: ?[^\s\p{L}\p{N}]+[\r\n]* (optional space + non-word chars + optional newlines) + auto flags2 = (cpt == ' ' ? _get_flags(pos + 1) : flags); + if (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) { + pos += (cpt == ' '); + while (!(flags2.is_whitespace || flags2.is_letter || flags2.is_number) && flags2.as_uint()) { + flags2 = _get_flags(++pos); + } + // Match optional [\r\n]* + uint32_t cpt2 = _get_cpt(pos); + while (cpt2 == '\r' || cpt2 == '\n') { + cpt2 = _get_cpt(++pos); + } + _add_token(pos); + continue; + } + + // Count whitespace characters + size_t num_whitespaces = 0; + size_t last_end_r_or_n = 0; + while (_get_flags(pos + num_whitespaces).is_whitespace) { + uint32_t cpt2 = _get_cpt(pos + num_whitespaces); + if (cpt2 == '\r' || cpt2 == '\n') { + last_end_r_or_n = pos + num_whitespaces + 1; + } + num_whitespaces++; + } + + // Pattern 6: \s*[\r\n]+ (whitespace with newlines) + if (last_end_r_or_n > 0) { + pos = last_end_r_or_n; + _add_token(pos); + continue; + } + + // Pattern 7: \s+(?!\S) (trailing whitespace) + if (num_whitespaces > 1 && _get_cpt(pos + num_whitespaces) != OUT_OF_RANGE) { + pos += num_whitespaces - 1; + _add_token(pos); + continue; + } + + // Pattern 8: \s+ (general whitespace) + if (num_whitespaces > 0) { + pos += num_whitespaces; + _add_token(pos); + continue; + } + + // No matches - consume single character + _add_token(++pos); + } + } + + return bpe_offsets; +} + static std::vector unicode_regex_split_custom(const std::string & text, const std::string & regex_expr, const std::vector & offsets) { std::vector bpe_offsets; @@ -562,6 +739,9 @@ static std::vector unicode_regex_split_custom(const std::string & text, regex_expr == "(?:'[sS]|'[tT]|'[rR][eE]|'[vV][eE]|'[mM]|'[lL][lL]|'[dD])|[^\\r\\n\\p{L}\\p{N}]?\\p{L}+|\\p{N}{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+") { bpe_offsets = unicode_regex_split_custom_llama3(text, offsets); + } else if (regex_expr == "\\p{Han}+") { + // K2's first pattern - handle all K2 patterns together + bpe_offsets = unicode_regex_split_custom_kimi_k2(text, offsets); } return bpe_offsets; @@ -667,6 +847,38 @@ uint32_t unicode_tolower(uint32_t cpt) { return cpt; // Return the original code point if no lowercase mapping is found } +bool unicode_cpt_is_han(uint32_t cpt) { + // Han character ranges (Chinese/CJK characters) + // CJK Unified Ideographs (most common) + if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true; + + // CJK Extension A + if (cpt >= 0x3400 && cpt <= 0x4DBF) return true; + + // CJK Extension B + if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true; + + // CJK Extension C + if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true; + + // CJK Extension D + if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true; + + // CJK Extension E + if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true; + + // CJK Extension F + if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true; + + // CJK Compatibility Ideographs + if (cpt >= 0xF900 && cpt <= 0xFAFF) return true; + + // CJK Compatibility Ideographs Supplement + if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true; + + return false; +} + std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs) { // unicode categories static const std::map k_ucat_enum = { diff --git a/src/unicode.h b/src/unicode.h index c27098df7d4be..0a5fa2a78ceff 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -63,4 +63,6 @@ uint8_t unicode_utf8_to_byte(const std::string & utf8); uint32_t unicode_tolower(uint32_t cpt); +bool unicode_cpt_is_han(uint32_t cpt); + std::vector unicode_regex_split(const std::string & text, const std::vector & regex_exprs); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index db4b2cf65cc43..fc1557a2d4065 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -185,6 +185,8 @@ llama_build_and_test(test-json-partial.cpp) llama_build_and_test(test-log.cpp) llama_build_and_test(test-regex-partial.cpp) +llama_build_and_test(test-thread-safety.cpp ARGS -hf ggml-org/models -hff tinyllamas/stories15M-q4_0.gguf -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4) + # this fails on windows (github hosted runner) due to curl DLL not found (exit code 0xc0000135) if (NOT WIN32) llama_build_and_test(test-arg-parser.cpp) diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 509a4b35f57cb..a3d68fba046cf 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -24,10 +24,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -315,8 +317,592 @@ enum test_mode { MODE_TEST, MODE_PERF, MODE_GRAD, + MODE_SUPPORT, }; +// Output format support similar to llama-bench +enum output_formats { CONSOLE, SQL, CSV }; + +static const char * output_format_str(output_formats format) { + switch (format) { + case CONSOLE: + return "console"; + case SQL: + return "sql"; + case CSV: + return "csv"; + default: + GGML_ABORT("invalid output format"); + } +} + +static bool output_format_from_str(const std::string & s, output_formats & format) { + if (s == "console") { + format = CONSOLE; + } else if (s == "sql") { + format = SQL; + } else if (s == "csv") { + format = CSV; + } else { + return false; + } + return true; +} + +// Test result structure for SQL output +struct test_result { + std::string test_time; + std::string build_commit; + std::string backend_name; + std::string op_name; + std::string op_params; + std::string test_mode; + bool supported; + bool passed; + std::string error_message; + double time_us; + double flops; + double bandwidth_gb_s; + size_t memory_kb; + int n_runs; + std::string device_description; + std::string backend_reg_name; + + test_result() { + // Initialize with default values + time_us = 0.0; + flops = 0.0; + bandwidth_gb_s = 0.0; + memory_kb = 0; + n_runs = 0; + supported = false; + passed = false; + + // Set test time + time_t t = time(NULL); + char buf[32]; + std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t)); + test_time = buf; + + // Set build info + build_commit = ggml_commit(); + } + + test_result(const std::string & backend_name, const std::string & op_name, const std::string & op_params, + const std::string & test_mode, bool supported, bool passed, const std::string & error_message = "", + double time_us = 0.0, double flops = 0.0, double bandwidth_gb_s = 0.0, size_t memory_kb = 0, + int n_runs = 0, const std::string & device_description = "", const std::string & backend_reg_name = "") : + backend_name(backend_name), + op_name(op_name), + op_params(op_params), + test_mode(test_mode), + supported(supported), + passed(passed), + error_message(error_message), + time_us(time_us), + flops(flops), + bandwidth_gb_s(bandwidth_gb_s), + memory_kb(memory_kb), + n_runs(n_runs), + device_description(device_description), + backend_reg_name(backend_reg_name) { + // Set test time + time_t t = time(NULL); + char buf[32]; + std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t)); + test_time = buf; + + // Set build info + build_commit = ggml_commit(); + } + + static const std::vector & get_fields() { + static const std::vector fields = { + "test_time", "build_commit", "backend_name", "op_name", "op_params", "test_mode", "supported", + "passed", "error_message", "time_us", "flops", "bandwidth_gb_s", "memory_kb", "n_runs", + "device_description", "backend_reg_name" + }; + return fields; + } + + enum field_type { STRING, BOOL, INT, FLOAT }; + + static field_type get_field_type(const std::string & field) { + if (field == "supported" || field == "passed") { + return BOOL; + } + if (field == "memory_kb" || field == "n_runs") { + return INT; + } + if (field == "time_us" || field == "flops" || field == "bandwidth_gb_s") { + return FLOAT; + } + return STRING; + } + + std::vector get_values() const { + return { test_time, + build_commit, + backend_name, + op_name, + op_params, + test_mode, + std::to_string(supported), + std::to_string(passed), + error_message, + std::to_string(time_us), + std::to_string(flops), + std::to_string(bandwidth_gb_s), + std::to_string(memory_kb), + std::to_string(n_runs), + device_description, + backend_reg_name }; + } +}; + +// Printer classes for different output formats +enum class test_status_t { NOT_SUPPORTED, OK, FAIL }; + +struct test_operation_info { + std::string op_name; + std::string op_params; + std::string backend_name; + test_status_t status = test_status_t::OK; + std::string failure_reason; + + // Additional information fields that were previously in separate structs + std::string error_component; + std::string error_details; + + // Gradient info + int64_t gradient_index = -1; + std::string gradient_param_name; + float gradient_value = 0.0f; + + // MAA error info + double maa_error = 0.0; + double maa_threshold = 0.0; + + // Flags for different types of information + bool has_error = false; + bool has_gradient_info = false; + bool has_maa_error = false; + bool is_compare_failure = false; + bool is_large_tensor_skip = false; + + test_operation_info() = default; + + test_operation_info(const std::string & op_name, const std::string & op_params, const std::string & backend_name, + test_status_t status = test_status_t::OK, const std::string & failure_reason = "") : + op_name(op_name), + op_params(op_params), + backend_name(backend_name), + status(status), + failure_reason(failure_reason) {} + + // Set error information + void set_error(const std::string & component, const std::string & details) { + has_error = true; + error_component = component; + error_details = details; + if (status == test_status_t::OK) { + status = test_status_t::FAIL; + } + } + + // Set gradient information + void set_gradient_info(int64_t index, const std::string & param_name, float value) { + has_gradient_info = true; + gradient_index = index; + gradient_param_name = param_name; + gradient_value = value; + if (status == test_status_t::OK) { + status = test_status_t::FAIL; + } + } + + // Set MAA error information + void set_maa_error(double error, double threshold) { + has_maa_error = true; + maa_error = error; + maa_threshold = threshold; + if (status == test_status_t::OK) { + status = test_status_t::FAIL; + } + } + + // Set compare failure + void set_compare_failure() { + is_compare_failure = true; + if (status == test_status_t::OK) { + status = test_status_t::FAIL; + } + } + + // Set large tensor skip + void set_large_tensor_skip() { is_large_tensor_skip = true; } +}; + +struct test_summary_info { + size_t tests_passed; + size_t tests_total; + bool is_backend_summary = false; // true for backend summary, false for test summary + + test_summary_info() = default; + + test_summary_info(size_t tests_passed, size_t tests_total, bool is_backend_summary = false) : + tests_passed(tests_passed), + tests_total(tests_total), + is_backend_summary(is_backend_summary) {} +}; + +struct testing_start_info { + size_t device_count; + + testing_start_info() = default; + + testing_start_info(size_t device_count) : device_count(device_count) {} +}; + +struct backend_init_info { + size_t device_index; + size_t total_devices; + std::string device_name; + bool skipped = false; + std::string skip_reason; + std::string description; + size_t memory_total_mb = 0; + size_t memory_free_mb = 0; + bool has_memory_info = false; + + backend_init_info() = default; + + backend_init_info(size_t device_index, size_t total_devices, const std::string & device_name, bool skipped = false, + const std::string & skip_reason = "", const std::string & description = "", + size_t memory_total_mb = 0, size_t memory_free_mb = 0, bool has_memory_info = false) : + device_index(device_index), + total_devices(total_devices), + device_name(device_name), + skipped(skipped), + skip_reason(skip_reason), + description(description), + memory_total_mb(memory_total_mb), + memory_free_mb(memory_free_mb), + has_memory_info(has_memory_info) {} +}; + +struct backend_status_info { + std::string backend_name; + test_status_t status; + + backend_status_info() = default; + + backend_status_info(const std::string & backend_name, test_status_t status) : + backend_name(backend_name), + status(status) {} +}; + +struct overall_summary_info { + size_t backends_passed; + size_t backends_total; + bool all_passed; + + overall_summary_info() = default; + + overall_summary_info(size_t backends_passed, size_t backends_total, bool all_passed) : + backends_passed(backends_passed), + backends_total(backends_total), + all_passed(all_passed) {} +}; + +struct printer { + virtual ~printer() {} + + FILE * fout = stdout; + + virtual void print_header() {} + + virtual void print_test_result(const test_result & result) = 0; + + virtual void print_footer() {} + + virtual void print_operation(const test_operation_info & info) { (void) info; } + + virtual void print_summary(const test_summary_info & info) { (void) info; } + + virtual void print_testing_start(const testing_start_info & info) { (void) info; } + + virtual void print_backend_init(const backend_init_info & info) { (void) info; } + + virtual void print_backend_status(const backend_status_info & info) { (void) info; } + + virtual void print_overall_summary(const overall_summary_info & info) { (void) info; } +}; + +struct console_printer : public printer { + void print_test_result(const test_result & result) override { + if (result.test_mode == "test") { + print_test_console(result); + } else if (result.test_mode == "perf") { + print_perf_console(result); + } else if (result.test_mode == "support") { + print_support_console(result); + } + } + + void print_operation(const test_operation_info & info) override { + printf(" %s(%s): ", info.op_name.c_str(), info.op_params.c_str()); + fflush(stdout); + + // Handle large tensor skip first + if (info.is_large_tensor_skip) { + printf("skipping large tensors for speed \n"); + return; + } + + // Handle not supported status + if (info.status == test_status_t::NOT_SUPPORTED) { + if (!info.failure_reason.empty()) { + printf("not supported [%s]\n", info.failure_reason.c_str()); + } else { + printf("not supported [%s]\n", info.backend_name.c_str()); + } + return; + } + + // Handle errors and additional information + if (info.has_error) { + if (info.error_component == "allocation") { + fprintf(stderr, "failed to allocate tensors [%s] ", info.backend_name.c_str()); + } else if (info.error_component == "backend") { + fprintf(stderr, " Failed to initialize %s backend\n", info.backend_name.c_str()); + } else { + fprintf(stderr, "Error in %s: %s\n", info.error_component.c_str(), info.error_details.c_str()); + } + } + + // Handle gradient info + if (info.has_gradient_info) { + printf("[%s] nonfinite gradient at index %" PRId64 " (%s=%f) ", info.op_name.c_str(), info.gradient_index, + info.gradient_param_name.c_str(), info.gradient_value); + } + + // Handle MAA error + if (info.has_maa_error) { + printf("[%s] MAA = %.9f > %.9f ", info.op_name.c_str(), info.maa_error, info.maa_threshold); + } + + // Handle compare failure + if (info.is_compare_failure) { + printf("compare failed "); + } + + // Print final status + if (info.status == test_status_t::OK) { + printf("\033[1;32mOK\033[0m\n"); + } else { + printf("\033[1;31mFAIL\033[0m\n"); + } + } + + void print_summary(const test_summary_info & info) override { + if (info.is_backend_summary) { + printf("%zu/%zu backends passed\n", info.tests_passed, info.tests_total); + } else { + printf(" %zu/%zu tests passed\n", info.tests_passed, info.tests_total); + } + } + + void print_backend_status(const backend_status_info & info) override { + printf(" Backend %s: ", info.backend_name.c_str()); + if (info.status == test_status_t::OK) { + printf("\033[1;32mOK\033[0m\n"); + } else { + printf("\033[1;31mFAIL\033[0m\n"); + } + } + + void print_testing_start(const testing_start_info & info) override { + printf("Testing %zu devices\n\n", info.device_count); + } + + void print_backend_init(const backend_init_info & info) override { + printf("Backend %zu/%zu: %s\n", info.device_index + 1, info.total_devices, info.device_name.c_str()); + + if (info.skipped) { + printf(" %s\n", info.skip_reason.c_str()); + return; + } + + if (!info.description.empty()) { + printf(" Device description: %s\n", info.description.c_str()); + } + + if (info.has_memory_info) { + printf(" Device memory: %zu MB (%zu MB free)\n", info.memory_total_mb, info.memory_free_mb); + } + + printf("\n"); + } + + void print_overall_summary(const overall_summary_info & info) override { + printf("%zu/%zu backends passed\n", info.backends_passed, info.backends_total); + if (info.all_passed) { + printf("\033[1;32mOK\033[0m\n"); + } else { + printf("\033[1;31mFAIL\033[0m\n"); + } + } + + private: + void print_test_console(const test_result & result) { + printf(" %s(%s): ", result.op_name.c_str(), result.op_params.c_str()); + fflush(stdout); + + if (!result.supported) { + printf("not supported [%s] ", result.backend_name.c_str()); + printf("\n"); + return; + } + + if (result.passed) { + printf("\033[1;32mOK\033[0m\n"); + } else { + printf("\033[1;31mFAIL\033[0m\n"); + } + } + + void print_perf_console(const test_result & result) { + int len = printf(" %s(%s): ", result.op_name.c_str(), result.op_params.c_str()); + fflush(stdout); + + if (!result.supported) { + printf("not supported\n"); + return; + } + + // align while also leaving some margin for variations in parameters + int align = 8; + int last = (len + align - 1) / align * align; + if (last - len < 5) { + last += align; + } + printf("%*s", last - len, ""); + + printf(" %8d runs - %8.2f us/run - ", result.n_runs, result.time_us); + + if (result.flops > 0) { + auto format_flops = [](double flops) -> std::string { + char buf[256]; + if (flops >= 1e12) { + snprintf(buf, sizeof(buf), "%6.2f TFLOP", flops / 1e12); + } else if (flops >= 1e9) { + snprintf(buf, sizeof(buf), "%6.2f GFLOP", flops / 1e9); + } else if (flops >= 1e6) { + snprintf(buf, sizeof(buf), "%6.2f MFLOP", flops / 1e6); + } else { + snprintf(buf, sizeof(buf), "%6.2f kFLOP", flops / 1e3); + } + return buf; + }; + uint64_t op_flops_per_run = result.flops * result.time_us / 1e6; + printf("%s/run - \033[1;34m%sS\033[0m", format_flops(op_flops_per_run).c_str(), + format_flops(result.flops).c_str()); + } else { + printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m", result.memory_kb, result.bandwidth_gb_s); + } + printf("\n"); + } + + void print_support_console(const test_result & result) { + printf(" %s(%s): ", result.op_name.c_str(), result.op_params.c_str()); + fflush(stdout); + + if (result.supported) { + printf("\033[1;32mSUPPORTED\033[0m\n"); + } else { + printf("\033[1;31mNOT SUPPORTED\033[0m\n"); + } + } +}; + +struct sql_printer : public printer { + static std::string get_sql_field_type(const std::string & field) { + switch (test_result::get_field_type(field)) { + case test_result::STRING: + return "TEXT"; + case test_result::BOOL: + case test_result::INT: + return "INTEGER"; + case test_result::FLOAT: + return "REAL"; + default: + GGML_ABORT("invalid field type"); + } + } + + void print_header() override { + std::vector fields = test_result::get_fields(); + fprintf(fout, "CREATE TABLE IF NOT EXISTS test_backend_ops (\n"); + for (size_t i = 0; i < fields.size(); i++) { + fprintf(fout, " %s %s%s\n", fields[i].c_str(), get_sql_field_type(fields[i]).c_str(), + i < fields.size() - 1 ? "," : ""); + } + fprintf(fout, ");\n\n"); + } + + void print_test_result(const test_result & result) override { + fprintf(fout, "INSERT INTO test_backend_ops ("); + std::vector fields = test_result::get_fields(); + for (size_t i = 0; i < fields.size(); i++) { + fprintf(fout, "%s%s", fields[i].c_str(), i < fields.size() - 1 ? ", " : ""); + } + fprintf(fout, ") VALUES ("); + std::vector values = result.get_values(); + for (size_t i = 0; i < values.size(); i++) { + fprintf(fout, "'%s'%s", values[i].c_str(), i < values.size() - 1 ? ", " : ""); + } + fprintf(fout, ");\n"); + } +}; + +struct csv_printer : public printer { + void print_header() override { + std::vector fields = test_result::get_fields(); + for (size_t i = 0; i < fields.size(); i++) { + printf("\"%s\"%s", fields[i].c_str(), i < fields.size() - 1 ? "," : ""); + } + printf("\n"); + } + + void print_test_result(const test_result & result) override { + std::vector values = result.get_values(); + for (size_t i = 0; i < values.size(); i++) { + // Escape quotes and wrap in quotes for CSV + std::string escaped_value = values[i]; + size_t pos = 0; + while ((pos = escaped_value.find("\"", pos)) != std::string::npos) { + escaped_value.replace(pos, 1, "\"\""); + pos += 2; + } + printf("\"%s\"%s", escaped_value.c_str(), i < values.size() - 1 ? "," : ""); + } + printf("\n"); + } +}; + +static std::unique_ptr create_printer(output_formats format) { + switch (format) { + case CONSOLE: + return std::make_unique(); + case SQL: + return std::make_unique(); + case CSV: + return std::make_unique(); + } + GGML_ABORT("invalid output format"); +} + struct test_case { virtual ~test_case() {} @@ -382,6 +968,8 @@ struct test_case { return 0; } + virtual bool run_whole_graph() { return false; } + ggml_cgraph * gf = nullptr; ggml_cgraph * gb = nullptr; @@ -392,7 +980,7 @@ struct test_case { std::vector sentinels; void add_sentinel(ggml_context * ctx) { - if (mode == MODE_PERF || mode == MODE_GRAD) { + if (mode == MODE_PERF || mode == MODE_GRAD || mode == MODE_SUPPORT) { return; } ggml_tensor * sentinel = ::ggml_new_tensor_1d(ctx, GGML_TYPE_F32, sentinel_size); @@ -432,7 +1020,7 @@ struct test_case { return t; } - bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_name) { + bool eval(ggml_backend_t backend1, ggml_backend_t backend2, const char * op_name, printer * output_printer) { mode = MODE_TEST; ggml_init_params params = { @@ -449,29 +1037,33 @@ struct test_case { add_sentinel(ctx); ggml_tensor * out = build_graph(ctx); - - if (op_name != nullptr && op_desc(out) != op_name) { + std::string current_op_name = op_desc(out); + if (op_name != nullptr && current_op_name != op_name) { //printf(" %s: skipping\n", op_desc(out).c_str()); ggml_free(ctx); return true; } - printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str()); - fflush(stdout); - // check if the backends support the ops bool supported = true; for (ggml_backend_t backend : {backend1, backend2}) { for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { if (!ggml_backend_supports_op(backend, t)) { - printf("not supported [%s] ", ggml_backend_name(backend)); supported = false; break; } } } + if (!supported) { - printf("\n"); + // Create test result for unsupported operation + test_result result(ggml_backend_name(backend1), current_op_name, vars(), "test", + false, false, "not supported"); + + if (output_printer) { + output_printer->print_test_result(result); + } + ggml_free(ctx); return true; } @@ -574,26 +1166,26 @@ struct test_case { GGML_UNUSED(index); }; - const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud); - - if (!cmp_ok) { - printf("compare failed "); - } + const bool cmp_ok = ggml_backend_compare_graph_backend(backend1, backend2, gf, callback, &ud, run_whole_graph() ? out : nullptr); ggml_backend_buffer_free(buf); ggml_free(ctx); - if (ud.ok && cmp_ok) { - printf("\033[1;32mOK\033[0m\n"); - return true; + // Create test result + bool test_passed = ud.ok && cmp_ok; + std::string error_msg = test_passed ? "" : (!cmp_ok ? "compare failed" : "test failed"); + test_result result(ggml_backend_name(backend1), current_op_name, vars(), "test", supported, test_passed, + error_msg); + + if (output_printer) { + output_printer->print_test_result(result); } - printf("\033[1;31mFAIL\033[0m\n"); - return false; + return test_passed; } - bool eval_perf(ggml_backend_t backend, const char * op_name) { + bool eval_perf(ggml_backend_t backend, const char * op_name, printer * output_printer) { mode = MODE_PERF; static const size_t graph_nodes = 8192; @@ -606,29 +1198,22 @@ struct test_case { ggml_context_ptr ctx(ggml_init(params)); // smart ptr GGML_ASSERT(ctx); - ggml_tensor * out = build_graph(ctx.get()); - - if (op_name != nullptr && op_desc(out) != op_name) { + ggml_tensor * out = build_graph(ctx.get()); + std::string current_op_name = op_desc(out); + if (op_name != nullptr && current_op_name != op_name) { //printf(" %s: skipping\n", op_desc(out).c_str()); return true; } - int len = printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str()); - fflush(stdout); - - // check if backends support op if (!ggml_backend_supports_op(backend, out)) { - printf("not supported\n"); - return true; - } + // Create test result for unsupported performance test + test_result result(ggml_backend_name(backend), current_op_name, vars(), "perf", false, false, + "not supported"); - // align while also leaving some margin for variations in parameters - int align = 8; - int last = (len + align - 1) / align * align; - if (last - len < 5) { - last += align; + output_printer->print_test_result(result); + + return true; } - printf("%*s", last - len, ""); // allocate ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr @@ -713,40 +1298,56 @@ struct test_case { total_runs += n_runs; } while (total_time_us < 1000*1000); // run for at least 1 second - printf(" %8d runs - %8.2f us/run - ", - total_runs, - (double)total_time_us / total_runs); + // Create test result + double avg_time_us = (double) total_time_us / total_runs; + double calculated_flops = (op_flops(out) > 0) ? (op_flops(out) * total_runs) / (total_time_us / 1e6) : 0.0; + double calculated_bandwidth = + (op_flops(out) == 0) ? total_mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0 : 0.0; + size_t calculated_memory_kb = op_size(out) / 1024; - if (op_flops(out) > 0) { - double flops_per_sec = (op_flops(out) * total_runs) / (total_time_us / 1e6); - auto format_flops = [](double flops) -> std::string { - char buf[256]; - if (flops >= 1e12) { - snprintf(buf, sizeof(buf), "%6.2f TFLOP", flops / 1e12); - } else if (flops >= 1e9) { - snprintf(buf, sizeof(buf), "%6.2f GFLOP", flops / 1e9); - } else if (flops >= 1e6) { - snprintf(buf, sizeof(buf), "%6.2f MFLOP", flops / 1e6); - } else { - snprintf(buf, sizeof(buf), "%6.2f KFLOP", flops / 1e3); - } - return buf; - }; - printf("%s/run - \033[1;34m%sS\033[0m", - format_flops(op_flops(out)).c_str(), - format_flops(flops_per_sec).c_str()); + test_result result(ggml_backend_name(backend), current_op_name, vars(), "perf", true, true, "", avg_time_us, + calculated_flops, calculated_bandwidth, calculated_memory_kb, total_runs); - } else { - printf("%8zu kB/run - \033[1;34m%7.2f GB/s\033[0m", - op_size(out) / 1024, - total_mem / (total_time_us / 1e6) / 1024.0 / 1024.0 / 1024.0); + if (output_printer) { + output_printer->print_test_result(result); } - printf("\n"); return true; } - bool eval_grad(ggml_backend_t backend, const char * op_name) { + bool eval_support(ggml_backend_t backend, const char * op_name, printer * output_printer) { + mode = MODE_SUPPORT; + + static const size_t graph_nodes = 8192; + + ggml_init_params params = { + /* .mem_size = */ ggml_tensor_overhead()*128 + ggml_graph_overhead_custom(graph_nodes, false), + /* .mem_base = */ NULL, + /* .no_alloc = */ true, + }; + ggml_context_ptr ctx(ggml_init(params)); // smart ptr + GGML_ASSERT(ctx); + + ggml_tensor * out = build_graph(ctx.get()); + std::string current_op_name = op_desc(out); + if (op_name != nullptr && current_op_name != op_name) { + return true; + } + + bool supported = ggml_backend_supports_op(backend, out); + + std::string device_desc = ggml_backend_dev_description(ggml_backend_get_device(backend)); + std::string backend_reg_name = ggml_backend_reg_name(ggml_backend_dev_backend_reg(ggml_backend_get_device(backend))); + + test_result result(ggml_backend_name(backend), current_op_name, vars(), "support", supported, supported, + supported ? "yes" : "no", 0.0, 0.0, 0.0, 0, 0, device_desc, backend_reg_name); + + output_printer->print_test_result(result); + + return true; + } + + bool eval_grad(ggml_backend_t backend, const char * op_name, printer * output_printer) { mode = MODE_GRAD; const std::vector expect = grad_expect(); @@ -764,42 +1365,47 @@ struct test_case { ggml_tensor * out = build_graph(ctx.get()); if ((op_name != nullptr && op_desc(out) != op_name) || out->op == GGML_OP_OPT_STEP_ADAMW) { - //printf(" %s: skipping\n", op_desc(out).c_str()); return true; } - printf(" %s(%s): ", op_desc(out).c_str(), vars().c_str()); - fflush(stdout); - if (out->type != GGML_TYPE_F32) { - printf("not supported [%s->type != FP32]\n", out->name); + output_printer->print_operation(test_operation_info(op_desc(out), vars(), ggml_backend_name(backend), + test_status_t::NOT_SUPPORTED, + out->name + std::string("->type != FP32"))); return true; } + // Print operation info first + output_printer->print_operation(test_operation_info(op_desc(out), vars(), ggml_backend_name(backend))); + // check if the backend supports the ops - bool supported = true; - bool any_params = false; + bool supported = true; + bool any_params = false; + std::string failure_reason; + for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { if (!ggml_backend_supports_op(backend, t)) { - printf("not supported [%s] ", ggml_backend_name(backend)); - supported = false; + supported = false; + failure_reason = ggml_backend_name(backend); break; } if ((t->flags & GGML_TENSOR_FLAG_PARAM)) { any_params = true; if (t->type != GGML_TYPE_F32) { - printf("not supported [%s->type != FP32] ", t->name); - supported = false; + supported = false; + failure_reason = std::string(t->name) + "->type != FP32"; break; } } } if (!any_params) { - printf("not supported [%s] \n", op_desc(out).c_str()); - supported = false; + supported = false; + failure_reason = op_desc(out); } + if (!supported) { - printf("\n"); + output_printer->print_operation(test_operation_info(op_desc(out), vars(), ggml_backend_name(backend), + test_status_t::NOT_SUPPORTED, failure_reason)); return true; } @@ -810,7 +1416,9 @@ struct test_case { } } if (ngrads > grad_nmax()) { - printf("skipping large tensors for speed \n"); + test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend)); + info.set_large_tensor_skip(); + output_printer->print_operation(info); return true; } @@ -833,25 +1441,30 @@ struct test_case { for (ggml_tensor * t = ggml_get_first_tensor(ctx.get()); t != NULL; t = ggml_get_next_tensor(ctx.get(), t)) { if (!ggml_backend_supports_op(backend, t)) { - printf("not supported [%s] ", ggml_backend_name(backend)); + output_printer->print_operation(test_operation_info(op_desc(out), vars(), ggml_backend_name(backend), + test_status_t::NOT_SUPPORTED, + ggml_backend_name(backend))); supported = false; break; } if ((t->flags & GGML_TENSOR_FLAG_PARAM) && t->type != GGML_TYPE_F32) { - printf("not supported [%s->type != FP32] ", t->name); + output_printer->print_operation(test_operation_info(op_desc(out), vars(), ggml_backend_name(backend), + test_status_t::NOT_SUPPORTED, + std::string(t->name) + "->type != FP32")); supported = false; break; } } if (!supported) { - printf("\n"); return true; } // allocate ggml_backend_buffer_ptr buf(ggml_backend_alloc_ctx_tensors(ctx.get(), backend)); // smart ptr if (buf == NULL) { - printf("failed to allocate tensors [%s] ", ggml_backend_name(backend)); + test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend)); + info.set_error("allocation", ""); + output_printer->print_operation(info); return false; } @@ -889,7 +1502,9 @@ struct test_case { for (int64_t i = 0; i < ne; ++i) { // gradient algebraic // check for nans if (!std::isfinite(ga[i])) { - printf("[%s] nonfinite gradient at index %" PRId64 " (%s=%f) ", ggml_op_desc(t), i, bn, ga[i]); + test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend)); + info.set_gradient_info(i, bn, ga[i]); + output_printer->print_operation(info); ok = false; break; } @@ -957,7 +1572,9 @@ struct test_case { const double err = mean_abs_asymm(gn.data(), ga.data(), gn.size(), expect); if (err > max_maa_err()) { - printf("[%s] MAA = %.9f > %.9f ", ggml_op_desc(t), err, max_maa_err()); + test_operation_info info(op_desc(out), vars(), ggml_backend_name(backend)); + info.set_maa_error(err, max_maa_err()); + output_printer->print_operation(info); ok = false; break; } @@ -966,16 +1583,18 @@ struct test_case { } } + // Create final test result + test_operation_info final_info(op_desc(out), vars(), ggml_backend_name(backend)); if (!ok) { - printf("compare failed "); + final_info.set_compare_failure(); } + final_info.status = ok ? test_status_t::OK : test_status_t::FAIL; + output_printer->print_operation(final_info); if (ok) { - printf("\033[1;32mOK\033[0m\n"); return true; } - printf("\033[1;31mFAIL\033[0m\n"); return false; } }; @@ -1026,53 +1645,176 @@ struct test_example : public test_case { // Step 3: return the output tensor. return out; } - // In order to also check the gradients for your op, add calls like ggml_set_param(a) - // immediately after you create the tensors. - // This is optional and only makes sense if a backward pass has actually been implemented for the new op. -}; + // In order to also check the gradients for your op, add calls like ggml_set_param(a) + // immediately after you create the tensors. + // This is optional and only makes sense if a backward pass has actually been implemented for the new op. +}; + + +// GGML_OP_UNARY +struct test_unary : public test_case { + const ggml_unary_op op; + const ggml_type type; + const std::array ne_a; + int v; // view (1 : non-contiguous a) + + std::string vars() override { + return VARS_TO_STR3(type, ne_a, v); + } + + test_unary(ggml_unary_op op, + ggml_type type = GGML_TYPE_F32, + std::array ne_a = {128, 2, 2, 2}, + int v = 0) + : op(op), type(type), ne_a(ne_a), v(v) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG || + op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU; + + ggml_tensor * a; + if (v & 1) { + auto ne = ne_a; ne[0] *= 3; + a = ggml_new_tensor(ctx, type, 4, ne.data()); + if (grad_supported) { + ggml_set_param(a); + } + ggml_set_name(a, "a"); + + a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0); + ggml_set_name(a, "view_of_a"); + } else { + a = ggml_new_tensor(ctx, type, 4, ne_a.data()); + if (grad_supported) { + ggml_set_param(a); + } + ggml_set_name(a, "a"); + } + + ggml_tensor * out = ggml_unary(ctx, a, op); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + // test extended range of values to check for NaNs in GELU + init_tensor_uniform(t, -150.f, 150.f); + } + } + + float grad_eps() override { + return 15.0f; + } + + std::vector grad_expect() override { + if (op == GGML_UNARY_OP_ABS) { + return {-1.0f, 1.0f}; + } + if (op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_STEP) { + return {0.0f}; + } + if (op == GGML_UNARY_OP_RELU) { + return {0.0f, 1.0f}; + } + return {}; + } + +}; + +// GGML_OP_GLU +struct test_glu : public test_case { + const ggml_glu_op op; + const ggml_type type; + const std::array ne_a; + int v; // view (1 : non-contiguous a) + bool swapped; + + std::string vars() override { + return VARS_TO_STR4(type, ne_a, v, swapped); + } + + test_glu(ggml_glu_op op, + ggml_type type = GGML_TYPE_F32, + std::array ne_a = {128, 2, 2, 2}, + int v = 0, + bool swapped = false) + : op(op), type(type), ne_a(ne_a), v(v), swapped(swapped) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a; + if (v & 1) { + auto ne = ne_a; ne[0] *= 3; + a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_name(a, "a"); + + a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0); + ggml_set_name(a, "view_of_a"); + } else { + a = ggml_new_tensor(ctx, type, 4, ne_a.data()); + ggml_set_name(a, "a"); + } + + ggml_tensor * out = ggml_glu(ctx, a, op, swapped); + ggml_set_name(out, "out"); + + return out; + } + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + // test extended range of values to check for NaNs in GELU + init_tensor_uniform(t, -150.f, 150.f); + } + } +}; -// GGML_OP_UNARY -struct test_unary : public test_case { - const ggml_unary_op op; +struct test_glu_split : public test_case { + const ggml_glu_op op; const ggml_type type; const std::array ne_a; int v; // view (1 : non-contiguous a) std::string vars() override { - return VARS_TO_STR3(type, ne_a, v); + return VARS_TO_STR3(type, ne_a, v) + ",split"; } - test_unary(ggml_unary_op op, + test_glu_split(ggml_glu_op op, ggml_type type = GGML_TYPE_F32, std::array ne_a = {128, 2, 2, 2}, int v = 0) : op(op), type(type), ne_a(ne_a), v(v) {} ggml_tensor * build_graph(ggml_context * ctx) override { - const bool grad_supported = op == GGML_UNARY_OP_ABS || op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_NEG || - op == GGML_UNARY_OP_STEP || op == GGML_UNARY_OP_RELU || op == GGML_UNARY_OP_SILU; - ggml_tensor * a; + ggml_tensor * b; if (v & 1) { auto ne = ne_a; ne[0] *= 3; a = ggml_new_tensor(ctx, type, 4, ne.data()); - if (grad_supported) { - ggml_set_param(a); - } + ggml_set_param(a); ggml_set_name(a, "a"); a = ggml_view_4d(ctx, a, ne_a[0], ne_a[1], ne_a[2], ne_a[3], a->nb[1], a->nb[2], a->nb[3], 0); ggml_set_name(a, "view_of_a"); + + b = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(b); + ggml_set_name(b, "b"); + + b = ggml_view_4d(ctx, b, ne_a[0], ne_a[1], ne_a[2], ne_a[3], b->nb[1], b->nb[2], b->nb[3], 0); + ggml_set_name(a, "view_of_b"); } else { a = ggml_new_tensor(ctx, type, 4, ne_a.data()); - if (grad_supported) { - ggml_set_param(a); - } + ggml_set_param(a); ggml_set_name(a, "a"); + + b = ggml_new_tensor(ctx, type, 4, ne_a.data()); + ggml_set_param(b); + ggml_set_name(b, "b"); } - ggml_tensor * out = ggml_unary(ctx, a, op); + ggml_tensor * out = ggml_glu_split(ctx, a, b, op); ggml_set_name(out, "out"); return out; @@ -1084,24 +1826,6 @@ struct test_unary : public test_case { init_tensor_uniform(t, -150.f, 150.f); } } - - float grad_eps() override { - return 15.0f; - } - - std::vector grad_expect() override { - if (op == GGML_UNARY_OP_ABS) { - return {-1.0f, 1.0f}; - } - if (op == GGML_UNARY_OP_SGN || op == GGML_UNARY_OP_STEP) { - return {0.0f}; - } - if (op == GGML_UNARY_OP_RELU) { - return {0.0f, 1.0f}; - } - return {}; - } - }; // GGML_OP_GET_ROWS @@ -1213,6 +1937,76 @@ struct test_get_rows_back : public test_case { } }; +// GGML_OP_SET_ROWS +struct test_set_rows : public test_case { + const ggml_type type; + const std::array ne; + const std::array nr23; // broadcast only dims 2 and 3 + const int r; // rows to set + const bool v; // view (non-contiguous src1) + + std::string vars() override { + return VARS_TO_STR5(type, ne, nr23, r, v); + } + + test_set_rows(ggml_type type, + std::array ne, + std::array nr23, + int r, bool v = false) + : type(type), ne(ne), nr23(nr23), r(r), v(v) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * dst = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2]*nr23[0], ne[3]*nr23[1]); + ggml_set_name(dst, "dst"); + + ggml_tensor * src = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, ne[0], r, ne[2]*nr23[0], ne[3]*nr23[1]); + ggml_set_name(src, "src"); + + ggml_tensor * row_idxs = ggml_new_tensor_3d(ctx, GGML_TYPE_I64, r, ne[2], ne[3]); + ggml_set_name(row_idxs, "row_idxs"); + + if (v) { + src = ggml_view_4d(ctx, src, ne[0], r/2, ne[2]*nr23[0], ne[3]*nr23[1], src->nb[1], src->nb[2], src->nb[3], 0); + row_idxs = ggml_view_3d(ctx, row_idxs, r/2, ne[2], ne[3], row_idxs->nb[1], row_idxs->nb[2], 0); + ggml_set_name(row_idxs, "view_of_rows"); + } + + ggml_tensor * out = ggml_set_rows(ctx, dst, src, row_idxs); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + std::random_device rd; + std::default_random_engine rng(rd()); + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + if (t->type == GGML_TYPE_I64) { + if (ggml_is_view_op(t->op)) { + continue; + } + + for (int i2 = 0; i2 < t->ne[2]; i2++) { + for (int i1 = 0; i1 < t->ne[1]; i1++) { + // generate a shuffled subset of row indices + std::vector data(ne[1]); + for (int i = 0; i < ne[1]; i++) { + data[i] = i; + } + std::shuffle(data.begin(), data.end(), rng); + data.resize(t->ne[0]); + + const size_t offs = i1*t->nb[1] + i2*t->nb[2]; + ggml_backend_tensor_set(t, data.data(), offs, t->ne[0]*sizeof(int64_t)); + } + } + } else { + init_tensor_uniform(t); + } + } + } +}; + // GGML_OP_ARGMAX struct test_argmax : public test_case { const ggml_type type; @@ -1655,22 +2449,24 @@ struct test_scale : public test_case { const ggml_type type; const std::array ne; float scale; + float bias; std::string vars() override { - return VARS_TO_STR3(type, ne, scale); + return VARS_TO_STR4(type, ne, scale, bias); } test_scale(ggml_type type = GGML_TYPE_F32, std::array ne = {10, 10, 10, 10}, - float scale = 2.0f) - : type(type), ne(ne), scale(scale) {} + float scale = 2.0f, + float bias = 0.0f) + : type(type), ne(ne), scale(scale), bias(bias) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); ggml_set_param(a); ggml_set_name(a, "a"); - ggml_tensor * out = ggml_scale(ctx, a, scale); + ggml_tensor * out = ggml_scale_bias(ctx, a, scale, bias); ggml_set_name(out, "out"); return out; @@ -1826,6 +2622,59 @@ struct test_rms_norm_back : public test_case { } }; +// GGML_OP_RMS_NORM + GGML_OP_MUL +struct test_rms_norm_mul : public test_case { + const ggml_type type; + const std::array ne; + const float eps; + + std::string op_desc(ggml_tensor * t) override { + GGML_UNUSED(t); + return "RMS_NORM_MUL"; + } + + bool run_whole_graph() override { return true; } + + std::string vars() override { + return VARS_TO_STR3(type, ne, eps); + } + + test_rms_norm_mul(ggml_type type = GGML_TYPE_F32, + std::array ne = {64, 5, 4, 3}, + float eps = 1e-6f) + : type(type), ne(ne), eps(eps) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_set_param(a); + ggml_set_name(a, "a"); + ggml_set_param(b); + ggml_set_name(b, "b"); + + // Use a and b early, so we don't end up with an OP_NONE between rms_norm and mul + a = ggml_add(ctx, a, b); + ggml_tensor * out = ggml_mul(ctx, ggml_rms_norm(ctx, a, eps), b); + ggml_set_name(out, "out"); + + return out; + } + + void initialize_tensors(ggml_context * ctx) override { + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + init_tensor_uniform(t, -10.f, 10.f); + } + } + + float grad_eps() override { + return 1.0f; + } + + bool grad_precise() override { + return true; + } +}; + // GGML_OP_SSM_CONV struct test_ssm_conv : public test_case { const ggml_type type; @@ -1854,28 +2703,58 @@ struct test_ssm_scan : public test_case { const ggml_type type; const int64_t d_state; - const int64_t d_inner; + const int64_t head_dim; + const int64_t n_head; + const int64_t n_group; const int64_t n_seq_tokens; const int64_t n_seqs; std::string vars() override { - return VARS_TO_STR5(type, d_state, d_inner, n_seq_tokens, n_seqs); + return VARS_TO_STR7(type, d_state, head_dim, n_head, n_group, n_seq_tokens, n_seqs); } test_ssm_scan(ggml_type type = GGML_TYPE_F32, - int64_t d_state = 32, int64_t d_inner = 32, int64_t n_seq_tokens = 32, int64_t n_seqs = 32) - : type(type), d_state(d_state), d_inner(d_inner), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {} + int64_t d_state = 32, + int64_t head_dim = 1, // non-zero for Mamba-2 + int64_t n_head = 32, + int64_t n_group = 1, + int64_t n_seq_tokens = 32, + int64_t n_seqs = 32) + : type(type), d_state(d_state), head_dim(head_dim), n_head(n_head), n_group(n_group), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {} ggml_tensor * build_graph(ggml_context * ctx) override { - ggml_tensor * s = ggml_new_tensor(ctx, type, 4, std::vector{ d_state, d_inner, n_seqs, 1 }.data()); - ggml_tensor * x = ggml_new_tensor(ctx, type, 4, std::vector{ d_inner, n_seq_tokens, n_seqs, 1 }.data()); - ggml_tensor * dt = ggml_new_tensor(ctx, type, 4, std::vector{ d_inner, n_seq_tokens, n_seqs, 1 }.data()); - ggml_tensor * A = ggml_new_tensor(ctx, type, 4, std::vector{ d_state, d_inner, 1 , 1 }.data()); - ggml_tensor * B = ggml_new_tensor(ctx, type, 4, std::vector{ d_state, n_seq_tokens, n_seqs, 1 }.data()); - ggml_tensor * C = ggml_new_tensor(ctx, type, 4, std::vector{ d_state, n_seq_tokens, n_seqs, 1 }.data()); - ggml_tensor * out = ggml_ssm_scan(ctx, s, x, dt, A, B, C); + ggml_tensor * s = ggml_new_tensor_4d(ctx, type, d_state, head_dim, n_head, n_seqs); + ggml_tensor * x = ggml_new_tensor_4d(ctx, type, head_dim, n_head, n_seq_tokens, n_seqs); + ggml_tensor * dt = ggml_new_tensor_3d(ctx, type, n_head, n_seq_tokens, n_seqs); + ggml_tensor * A = ggml_new_tensor_2d(ctx, type, (head_dim > 1) ? 1 : d_state, n_head); + ggml_tensor * B = ggml_new_tensor_4d(ctx, type, d_state, n_group, n_seq_tokens, n_seqs); + ggml_tensor * C = ggml_new_tensor_4d(ctx, type, d_state, n_group, n_seq_tokens, n_seqs); + ggml_tensor * ids = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, n_seqs); + ggml_tensor * out = ggml_ssm_scan(ctx, s, x, dt, A, B, C, ids); return out; } + + // similar to test_mul_mat_id + void initialize_tensors(ggml_context * ctx) override { + std::random_device rd; + std::default_random_engine rng(rd()); + for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) { + if (t->type == GGML_TYPE_I32) { + if (ggml_is_view_op(t->op)) { continue; } + // ids + for (int64_t r = 0; r < ggml_nrows(t); r++) { + std::vector data(t->ne[0]); + for (int i = 0; i < t->ne[0]; i++) { + data[i] = i; + } + std::shuffle(data.begin(), data.end(), rng); + ggml_backend_tensor_set(t, data.data(), r * t->nb[1], t->ne[0] * sizeof(int32_t)); + } + } else { + init_tensor_uniform(t); + } + } + } }; // GGML_OP_RWKV_WKV6 @@ -2455,11 +3334,12 @@ struct test_soft_max : public test_case { const std::array ne; const bool mask; const ggml_type m_prec; + const std::array nr23; // broadcast only dims 2 and 3 const float scale; const float max_bias; std::string vars() override { - return VARS_TO_STR6(type, ne, mask, m_prec, scale, max_bias); + return VARS_TO_STR7(type, ne, mask, m_prec, nr23, scale, max_bias); } // the 1024 test with bias occasionally fails: @@ -2472,18 +3352,19 @@ struct test_soft_max : public test_case { std::array ne = {10, 5, 4, 3}, bool mask = false, ggml_type m_prec = GGML_TYPE_F32, + std::array nr23 = {1, 1}, float scale = 1.0f, float max_bias = 0.0f) - : type(type), ne(ne), mask(mask), m_prec(m_prec), scale(scale), max_bias(max_bias) {} + : type(type), ne(ne), mask(mask), m_prec(m_prec), nr23(nr23), scale(scale), max_bias(max_bias) {} ggml_tensor * build_graph(ggml_context * ctx) override { - ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); + ggml_tensor * a = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2]*nr23[0], ne[3]*nr23[1]); ggml_set_param(a); ggml_set_name(a, "a"); ggml_tensor * mask = nullptr; if (this->mask) { - mask = ggml_new_tensor_2d(ctx, m_prec, ne[0], ne[1]); + mask = ggml_new_tensor_4d(ctx, m_prec, ne[0], ne[1], ne[2], ne[3]); ggml_set_name(mask, "mask"); } @@ -2725,6 +3606,35 @@ struct test_conv_transpose_1d : public test_case { } }; +// GGML_OP_CONV_TRANSPOSE_2D +struct test_conv_transpose_2d : public test_case { + const std::array ne_input; + const std::array ne_kernel; + const int stride; + + std::string vars() override { + return VARS_TO_STR3(ne_input, ne_kernel, stride); + } + + test_conv_transpose_2d(std::array ne_input = {10, 10, 3, 1}, // [input_width, input_height, input_channels, 1] + std::array ne_kernel = {3, 3, 3, 1}, // [kernel_width, kernel_height, input_channels, 1] + int stride = 1) + : ne_input(ne_input), ne_kernel(ne_kernel), stride(stride){} + + ggml_tensor * build_graph(ggml_context * ctx) override { + ggml_tensor * input = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne_input.data()); + ggml_set_name(input, "input"); + + ggml_tensor * kernel = ggml_new_tensor(ctx, GGML_TYPE_F16, 4, ne_kernel.data()); + ggml_set_name(kernel, "kernel"); + + ggml_tensor * out = ggml_conv_transpose_2d_p0(ctx, kernel, input, stride); + ggml_set_name(out, "out"); + + return out; + } +}; + // GGML_OP_IM2COL struct test_im2col : public test_case { const ggml_type type_input; @@ -3037,28 +3947,28 @@ struct test_upscale : public test_case { } }; -// GGML_OP_UPSCALE (ext) -struct test_upscale_ext : public test_case { +// GGML_OP_UPSCALE (via ggml_interpolate) +struct test_interpolate : public test_case { const ggml_type type; const std::array ne; const std::array ne_tgt; - const ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST; + const uint32_t mode = GGML_SCALE_MODE_NEAREST; std::string vars() override { return VARS_TO_STR4(type, ne, ne_tgt, mode); } - test_upscale_ext(ggml_type type = GGML_TYPE_F32, + test_interpolate(ggml_type type = GGML_TYPE_F32, std::array ne = {2, 5, 7, 11}, std::array ne_tgt = {5, 7, 11, 13}, - ggml_scale_mode mode = GGML_SCALE_MODE_NEAREST) + uint32_t mode = GGML_SCALE_MODE_NEAREST) : type(type), ne(ne), ne_tgt(ne_tgt), mode(mode) {} ggml_tensor * build_graph(ggml_context * ctx) override { ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data()); ggml_set_name(a, "a"); - ggml_tensor * out = ggml_upscale_ext(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode); + ggml_tensor * out = ggml_interpolate(ctx, a, ne_tgt[0], ne_tgt[1],ne_tgt[2], ne_tgt[3], mode); ggml_set_name(out, "out"); return out; @@ -3204,6 +4114,32 @@ struct test_pad_reflect_1d : public test_case { } }; +// GGML_OP_ROLL +struct test_roll : public test_case { + const int shift0; + const int shift1; + const int shift3; + const int shift4; + + std::string vars() override { + return VARS_TO_STR4(shift0, shift1, shift3, shift4); + } + + test_roll(int shift0 = 3, int shift1 = -2, int shift3 = 1, int shift4 = -1) + : shift0(shift0), shift1(shift1), shift3(shift3), shift4(shift4) {} + + ggml_tensor * build_graph(ggml_context * ctx) override { + int64_t ne[4] = {10, 5, 4, 3}; + ggml_tensor * a = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); + ggml_set_name(a, "a"); + + ggml_tensor * out = ggml_roll(ctx, a, shift0, shift1, shift3, shift4); + ggml_set_name(out, "out"); + + return out; + } +}; + // GGML_OP_ARANGE struct test_arange : public test_case { const ggml_type type; @@ -3285,7 +4221,7 @@ struct test_flash_attn_ext : public test_case { const int64_t hsk; // K head size const int64_t hsv; // V head size const int64_t nh; // num heads - const int64_t nr; // repeat in Q, tests for grouped-query attention + const std::array nr23; // repeat in dim 2 and 3, tests for grouped-query attention const int64_t kv; // kv size const int64_t nb; // batch size @@ -3299,7 +4235,7 @@ struct test_flash_attn_ext : public test_case { std::array permute; std::string vars() override { - return VARS_TO_STR12(hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, permute); + return VARS_TO_STR12(hsk, hsv, nh, nr23, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, permute); } double max_nmse_err() override { @@ -3310,13 +4246,13 @@ struct test_flash_attn_ext : public test_case { GGML_UNUSED(t); // Just counting matmul costs: // Q*K^T is nb x hsk x kv, P*V is nb x kv x hsv, per head - return 2 * nh*nr * nb * (hsk + hsv) * kv; + return (2 * nh*nr23[0] * nb * (hsk + hsv) * kv)*nr23[1]; } - test_flash_attn_ext(int64_t hsk = 128, int64_t hsv = 128, int64_t nh = 32, int64_t nr = 1, int64_t kv = 96, int64_t nb = 8, + test_flash_attn_ext(int64_t hsk = 128, int64_t hsv = 128, int64_t nh = 32, std::array nr23 = {1, 1}, int64_t kv = 96, int64_t nb = 8, bool mask = true, float max_bias = 0.0f, float logit_softcap = 0.0f, ggml_prec prec = GGML_PREC_F32, ggml_type type_KV = GGML_TYPE_F16, std::array permute = {0, 1, 2, 3}) - : hsk(hsk), hsv(hsv), nh(nh), nr(nr), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), prec(prec), type_KV(type_KV), permute(permute) {} + : hsk(hsk), hsv(hsv), nh(nh), nr23(nr23), kv(kv), nb(nb), mask(mask), max_bias(max_bias), logit_softcap(logit_softcap), prec(prec), type_KV(type_KV), permute(permute) {} ggml_tensor * build_graph(ggml_context * ctx) override { const int64_t hsk_padded = GGML_PAD(hsk, ggml_blck_size(type_KV)); @@ -3335,18 +4271,18 @@ struct test_flash_attn_ext : public test_case { return t; }; - ggml_tensor * q = create_permuted(GGML_TYPE_F32, hsk_padded, nb, nh*nr, 1); + ggml_tensor * q = create_permuted(GGML_TYPE_F32, hsk_padded, nb, nh*nr23[0], nr23[1]); ggml_set_name(q, "q"); - ggml_tensor * k = create_permuted(type_KV, hsk_padded, kv, nh, 1); + ggml_tensor * k = create_permuted(type_KV, hsk_padded, kv, nh, nr23[1]); ggml_set_name(k, "k"); - ggml_tensor * v = create_permuted(type_KV, hsv_padded, kv, nh, 1); + ggml_tensor * v = create_permuted(type_KV, hsv_padded, kv, nh, nr23[1]); ggml_set_name(v, "v"); ggml_tensor * m = nullptr; if (mask) { - m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), 1, 1); + m = ggml_new_tensor_4d(ctx, GGML_TYPE_F16, kv, GGML_PAD(nb, GGML_KQ_MASK_PAD), 1, nr23[1]); ggml_set_name(m, "m"); } @@ -3637,6 +4573,7 @@ struct test_llama : public test_llm { static constexpr float attn_factor = 1.0f; static constexpr float beta_fast = 32.0f; static constexpr float beta_slow = 1.0f; + bool fused; std::string op_desc(ggml_tensor * t) override { GGML_UNUSED(t); @@ -3652,7 +4589,9 @@ struct test_llama : public test_llm { return 2e-3; } - test_llama(int n_tokens = 1) + bool run_whole_graph() override { return fused; } + + test_llama(int n_tokens = 1, bool fused = false) : test_llm({ /*n_vocab =*/ 32000, /*n_embd =*/ 3200, @@ -3664,7 +4603,9 @@ struct test_llama : public test_llm { /*f_norm_eps =*/ 0.f, /*f_norm_rms_eps =*/ 1e-5f, /*n_tokens =*/ n_tokens, - }) { + }) + , fused(fused) + { } ggml_tensor * build_graph(ggml_context * ctx) override { @@ -3931,6 +4872,21 @@ static std::vector> make_test_cases_eval() { } } + // glu ops + for (ggml_type type : {GGML_TYPE_F16, GGML_TYPE_F32}) { + for (int v : {0, 1}) { + for (int op = 0; op < GGML_GLU_OP_COUNT; op++) { + for (bool swapped : {false, true}) { + test_cases.emplace_back(new test_glu((ggml_glu_op) op, type, { 128, 2, 2, 2 }, v, swapped)); + test_cases.emplace_back(new test_glu((ggml_glu_op) op, type, { 5, 7, 11, 13 }, v, swapped)); + } + + test_cases.emplace_back(new test_glu_split((ggml_glu_op) op, type, { 128, 2, 2, 2 }, v)); + test_cases.emplace_back(new test_glu_split((ggml_glu_op) op, type, { 5, 7, 11, 13 }, v)); + } + } + } + test_cases.emplace_back(new test_get_rows(GGML_TYPE_F32, 1, 8, 2, 1, false)); for (ggml_type type : all_types) { for (int b : {1, 7}) { @@ -3955,6 +4911,23 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_I32, 256, 5, 4, 1, v)); } + test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, { 1, 8, 1, 3 }, { 1, 1 }, 2, false)); + for (ggml_type type : all_types) { + for (int b : {1, 7}) { + for (bool v : {false, true}) { + test_cases.emplace_back(new test_set_rows(type, { 256, 5, b, 3 }, { 1, 1, }, 1, v)); + test_cases.emplace_back(new test_set_rows(type, { 256, 11, 1, b }, { 2, 3, }, 7, v)); + + test_cases.emplace_back(new test_set_rows(type, { 3*ggml_blck_size(type), 3, b, 1 }, { 2, 3, }, 2, v)); + + if (ggml_blck_size(type) == 1) { + test_cases.emplace_back(new test_set_rows(type, { 31, 3, b, 1 }, { 2, 3, }, 2, v)); + test_cases.emplace_back(new test_set_rows(type, { 33, 5, 1, b }, { 2, 3, }, 1, v)); + } + } + } + } + for (ggml_type type_input : {GGML_TYPE_F32}) { for (ggml_op_pool pool_type : {GGML_OP_POOL_AVG, GGML_OP_POOL_MAX}) { for (int k0 : {1, 3}) { @@ -4050,6 +5023,9 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_conv_transpose_1d({3,2,1,1}, {3,1,2,1}, 1, 0, 1)); test_cases.emplace_back(new test_conv_transpose_1d({2,1,1,1}, {3,1,1,1}, 1, 0, 1)); + test_cases.emplace_back(new test_conv_transpose_2d({3, 2, 3, 1}, {2, 2, 1, 3}, 1)); + test_cases.emplace_back(new test_conv_transpose_2d({10, 10, 9, 1}, {3, 3, 1, 9}, 2)); + test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 500, 1, 1})); test_cases.emplace_back(new test_count_equal(GGML_TYPE_F32, {4, 5000, 1, 1})); @@ -4177,6 +5153,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_add1()); test_cases.emplace_back(new test_scale()); + test_cases.emplace_back(new test_scale(GGML_TYPE_F32, {10, 10, 10, 10}, 2.0f, 1.0f)); test_cases.emplace_back(new test_silu_back()); for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f}) { @@ -4187,14 +5164,23 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_rms_norm_back(GGML_TYPE_F32, {64, 5, 4, 3}, eps)); test_cases.emplace_back(new test_l2_norm (GGML_TYPE_F32, {64, 5, 4, 3}, eps)); } + for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) { + test_cases.emplace_back(new test_rms_norm_mul(GGML_TYPE_F32, {64, 5, 4, 3}, eps)); + } test_cases.emplace_back(new test_l2_norm(GGML_TYPE_F32, {64, 5, 4, 3}, 1e-12f)); - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 1, 1}, {4, 1536, 1, 1})); - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, 1536, 1, 1}, {4, 1536, 1, 1})); - test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, 1536, 4, 1}, {4, 1536, 1, 1})); + for (int64_t d_conv : {3, 4}) { + for (int64_t d_inner: {1024, 1536, 2048}) { + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {8, d_inner, 1, 1}, {d_conv, d_inner, 1, 1})); + test_cases.emplace_back(new test_ssm_conv(GGML_TYPE_F32, {4, d_inner, 4, 1}, {d_conv, d_inner, 1, 1})); + } + } - test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1024, 32, 4)); + test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 16, 1, 1024, 1, 32, 4)); // Mamba-1 + test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 128, 64, 16, 2, 32, 4)); // Mamba-2 + test_cases.emplace_back(new test_ssm_scan(GGML_TYPE_F32, 256, 64, 8, 2, 32, 4)); // Falcon-H1 test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 1, 1)); test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 32, 1)); @@ -4220,39 +5206,45 @@ static std::vector> make_test_cases_eval() { #if 1 for (ggml_type type_a : base_types) { for (ggml_type type_b : {GGML_TYPE_F32, GGML_TYPE_F16}) { - // test cases without permutation - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {1, 1}, {1, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 1}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {1, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {3, 2}, {2, 2})); - - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {1, 1}, {1, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 1}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 1})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {1, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {3, 2}, {2, 2})); - - // test cases with permutation - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 2, 1, 3})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 1, 3, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 256, {2, 3}, {1, 1}, {0, 3, 2, 1})); - - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 2, 1, 3})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 1, 3, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, 256, {2, 3}, {1, 1}, {0, 3, 2, 1})); - - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 2, 1, 3})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 1, 3, 2})); - test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, 256, {2, 3}, {1, 1}, {0, 3, 2, 1})); + std::vector ks = { 256 }; + if (ggml_blck_size(type_a) == 1) { + ks.push_back(4); + } + for (auto k : ks) { + // test cases without permutation + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {1, 1}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {1, 1}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {1, 1}, {1, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 1}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 1}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 2}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 2}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 2}, {1, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {3, 2}, {2, 2})); + + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {1, 1}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {1, 1}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {1, 1}, {1, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 1}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 1}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 2}, {1, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 2}, {2, 1})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 2}, {1, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {3, 2}, {2, 2})); + + // test cases with permutation + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {2, 3}, {1, 1}, {0, 2, 1, 3})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {2, 3}, {1, 1}, {0, 1, 3, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, k, {2, 3}, {1, 1}, {0, 3, 2, 1})); + + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, k, {2, 3}, {1, 1}, {0, 2, 1, 3})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, k, {2, 3}, {1, 1}, {0, 1, 3, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 8, k, {2, 3}, {1, 1}, {0, 3, 2, 1})); + + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {2, 3}, {1, 1}, {0, 2, 1, 3})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {2, 3}, {1, 1}, {0, 1, 3, 2})); + test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 16, k, {2, 3}, {1, 1}, {0, 3, 2, 1})); + } // test cases with large ne00/ne10 to cover stream-k fixup test_cases.emplace_back(new test_mul_mat(type_a, type_b, 16, 1, 1024, {3, 2}, {1, 1})); @@ -4300,8 +5292,10 @@ static std::vector> make_test_cases_eval() { for (auto nr : {1,4}) { for (uint32_t m = 0; m < 2; ++m) { for (uint32_t k = 0; k < 2; ++k) { - test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 1056 + m, 1, 128 + k, {bs, 1}, {nr, 1}, {0, 2, 1, 3})); - test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 128 + m, 1, 1056 + k, {bs, 1}, {nr, 1}, {0, 1, 2, 3}, true)); + for (ggml_type type: {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_F32}) { + test_cases.emplace_back(new test_mul_mat(type, GGML_TYPE_F32, 1056 + m, 1, 128 + k, {bs, 1}, {nr, 1}, {0, 2, 1, 3})); + test_cases.emplace_back(new test_mul_mat(type, GGML_TYPE_F32, 128 + m, 1, 1056 + k, {bs, 1}, {nr, 1}, {0, 1, 2, 3}, true)); + } } } } @@ -4313,6 +5307,11 @@ static std::vector> make_test_cases_eval() { // this case is verified (pass) in Intel(R) Data Center GPU Max 1100 (sycl backend) and NV A30 (cuda backend) // test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 512, 262144, 9216, {1, 1}, {1, 1})); + // test large experts*tokens + for (bool b : {false, true}) { + test_cases.emplace_back(new test_mul_mat_id(GGML_TYPE_F16, GGML_TYPE_F32, 16, 16, b, 32, 1024, 16)); + } + for (ggml_type type_a : base_types) { for (ggml_type type_b : {GGML_TYPE_F32 /*, GGML_TYPE_F16 */}) { for (int n_mats : {4, 8}) { @@ -4399,26 +5398,31 @@ static std::vector> make_test_cases_eval() { for (int64_t ne1 : {16, 1024}) { if (mask) { for (ggml_type m_prec : {GGML_TYPE_F32, GGML_TYPE_F16}) { - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, scale, max_bias)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, scale, max_bias)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, m_prec, {1, 1}, scale, max_bias)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, {1, 1}, scale, max_bias)); + + if (ne0 <= 32 && ne1 <= 32) { + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 3}, mask, m_prec, {3, 1}, scale, max_bias)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, m_prec, {2, 3}, scale, max_bias)); + } } } else { /* The precision of mask here doesn't matter as boolean mask is false */ - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, GGML_TYPE_F32, scale, max_bias)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0, ne1, 1, 1}, mask, GGML_TYPE_F32, {1, 1}, scale, max_bias)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {ne0-1, ne1-1, 1, 1}, mask, GGML_TYPE_F32, {1, 1}, scale, max_bias)); } } } } } } - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, GGML_TYPE_F32, 0.1f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, 0.1f, 8.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, 0.1f, 8.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F32, {1, 1}, 0.1f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, true, GGML_TYPE_F16, {1, 1}, 0.1f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {16, 2, 32, 1}, false, GGML_TYPE_F32, {1, 1}, 0.1f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, {1, 1}, 0.1f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, {1, 1}, 0.1f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F32, {1, 1}, 0.1f, 8.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {32, 2, 32, 1}, true, GGML_TYPE_F16, {1, 1}, 0.1f, 8.0f)); for (float max_bias : {0.0f, 8.0f}) { for (float scale : {1.0f, 0.1f}) { @@ -4434,12 +5438,12 @@ static std::vector> make_test_cases_eval() { for (bool fw : {true, false}) { // fw == forward bool all = true; - for (float v : { 0, 1 }) { - for (float fs : { 1.0f, 1.4245f }) { - for (float ef : { 0.0f, 0.7465f }) { - for (float af : { 1.0f, 1.4245f }) { - for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) { - for (bool ff : {false, true}) { // freq_factors + for (float fs : { 1.0f, 1.4245f }) { + for (float ef : { 0.0f, 0.7465f }) { + for (float af : { 1.0f, 1.4245f }) { + for (ggml_type type : {GGML_TYPE_F32, GGML_TYPE_F16}) { + for (bool ff : {false, true}) { // freq_factors + for (float v : { 0, 1 }) { test_cases.emplace_back(new test_rope(type, {128, 32, 2, 1}, 128, 0, 512, fs, ef, af, ff, v, fw)); // llama 7B if (all) { @@ -4452,13 +5456,21 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_rope(type, { 64, 1, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 7B) test_cases.emplace_back(new test_rope(type, { 64, 71, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 7B) test_cases.emplace_back(new test_rope(type, { 64, 8, 2, 1}, 64, 2, 512, fs, ef, af, ff, v, fw)); // neox (falcon 40B) + + test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, 0, 512, fs, ef, af, ff, v, fw)); + test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, 0, 512, fs, ef, af, ff, v, fw)); + test_cases.emplace_back(new test_rope(type, { 80, 32, 4, 1}, 32, 0, 512, fs, ef, af, ff, v, fw)); + test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 20, 2, 512, fs, ef, af, ff, v, fw)); // neox (stablelm) test_cases.emplace_back(new test_rope(type, { 80, 32, 2, 1}, 32, 2, 512, fs, ef, af, ff, v, fw)); // neox (phi-2) + test_cases.emplace_back(new test_rope(type, { 80, 32, 4, 1}, 32, 2, 512, fs, ef, af, ff, v, fw)); // neox (phi-2) } if (all) { test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 128, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl 2B) test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 128, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl 7B) + test_cases.emplace_back(new test_rope(type, {128, 12, 2, 1}, 20, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); + test_cases.emplace_back(new test_rope(type, {128, 28, 2, 1}, 32, GGML_ROPE_TYPE_MROPE, 512, fs, ef, af, ff, v, fw)); test_cases.emplace_back(new test_rope(type, { 80, 16, 2, 1}, 80, GGML_ROPE_TYPE_VISION, 512, fs, ef, af, ff, v, fw)); // rope_multi,m-rope (qwen2vl ViT) } @@ -4489,8 +5501,10 @@ static std::vector> make_test_cases_eval() { for (ggml_scale_mode mode : {GGML_SCALE_MODE_NEAREST, GGML_SCALE_MODE_BILINEAR}) { test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode)); test_cases.emplace_back(new test_upscale(GGML_TYPE_F32, {512, 512, 3, 2}, 2, mode, true)); - test_cases.emplace_back(new test_upscale_ext(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode)); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, mode)); + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {5, 7, 11, 13}, {2, 5, 7, 11}, mode)); } + test_cases.emplace_back(new test_interpolate(GGML_TYPE_F32, {2, 5, 7, 11}, {5, 7, 11, 13}, GGML_SCALE_MODE_BILINEAR | GGML_SCALE_FLAG_ALIGN_CORNERS)); test_cases.emplace_back(new test_sum()); test_cases.emplace_back(new test_sum_rows()); @@ -4500,6 +5514,7 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_acc()); test_cases.emplace_back(new test_pad()); test_cases.emplace_back(new test_pad_reflect_1d()); + test_cases.emplace_back(new test_roll()); test_cases.emplace_back(new test_arange()); test_cases.emplace_back(new test_timestep_embedding()); test_cases.emplace_back(new test_leaky_relu()); @@ -4516,20 +5531,23 @@ static std::vector> make_test_cases_eval() { for (float logit_softcap : {0.0f, 10.0f}) { if (hsk != 128 && logit_softcap != 0.0f) continue; for (int nh : { 4, }) { - for (int nr : { 1, 4, 16 }) { - if (nr == 16 && hsk != 128) continue; - for (int kv : { 512, 1024, }) { - if (nr != 1 && kv != 512) continue; - for (int nb : { 1, 3, 32, 35, }) { - for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) { - if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue; - for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) { - test_cases.emplace_back(new test_flash_attn_ext( - hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV)); - // run fewer test cases permuted - if (mask == true && max_bias == 0.0f && logit_softcap == 0 && kv == 512) { + for (int nr3 : { 1, 3, }) { + if (hsk > 64 && nr3 > 1) continue; // skip broadcast for large head sizes + for (int nr2 : { 1, 4, 16 }) { + if (nr2 == 16 && hsk != 128) continue; + for (int kv : { 512, 1024, }) { + if (nr2 != 1 && kv != 512) continue; + for (int nb : { 1, 3, 32, 35, }) { + for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) { + if (hsk != 128 && prec == GGML_PREC_DEFAULT) continue; + for (ggml_type type_KV : {GGML_TYPE_F16, GGML_TYPE_BF16, GGML_TYPE_Q8_0, GGML_TYPE_Q4_0}) { test_cases.emplace_back(new test_flash_attn_ext( - hsk, hsv, nh, nr, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, {0, 2, 1, 3})); + hsk, hsv, nh, {nr2, nr3}, kv, nb, mask, max_bias, logit_softcap, prec, type_KV)); + // run fewer test cases permuted + if (mask == true && max_bias == 0.0f && logit_softcap == 0 && kv == 512) { + test_cases.emplace_back(new test_flash_attn_ext( + hsk, hsv, nh, {nr2, nr3}, kv, nb, mask, max_bias, logit_softcap, prec, type_KV, {0, 2, 1, 3})); + } } } } @@ -4550,8 +5568,9 @@ static std::vector> make_test_cases_eval() { test_cases.emplace_back(new test_opt_step_adamw(GGML_TYPE_F32, {10, 5, 4, 3})); - // these tests are disabled to save execution time, but they can be handy for debugging #if 0 + // these tests are disabled to save execution time, sbut they can be handy for debugging + test_cases.emplace_back(new test_llama(2, true)); test_cases.emplace_back(new test_llama(1)); test_cases.emplace_back(new test_llama(2)); test_cases.emplace_back(new test_falcon(1)); @@ -4572,13 +5591,14 @@ static std::vector> make_test_cases_perf() { test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {8192, 512, 2, 1}, {0, 2, 1, 3})); test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {3072, 512, 2, 1}, {0, 2, 1, 3})); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); - test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, GGML_TYPE_F32, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {4096, 4096, 5, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {12888, 256, 5, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 4096, 5, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {1024, 1024, 10, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 1024, 10, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {256, 256, 20, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {64, 64, 20, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); + test_cases.emplace_back(new test_soft_max(GGML_TYPE_F32, {77, 64, 20, 1}, false, GGML_TYPE_F32, {1, 1}, 1.0f, 0.0f)); test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {32, 10, 1, 1})); test_cases.emplace_back(new test_argmax(GGML_TYPE_F32, {1024, 10, 1, 1})); @@ -4610,7 +5630,7 @@ static std::vector> make_test_cases_perf() { for (int kv : { 4096, 8192, 16384, }) { for (int hs : { 64, 128, }) { for (int nr : { 1, 4, }) { - test_cases.emplace_back(new test_flash_attn_ext(hs, hs, 8, nr, kv, 1, true, 0, 0, GGML_PREC_F32, GGML_TYPE_F16)); + test_cases.emplace_back(new test_flash_attn_ext(hs, hs, 8, {nr, 1}, kv, 1, true, 0, 0, GGML_PREC_F32, GGML_TYPE_F16)); } } } @@ -4618,10 +5638,15 @@ static std::vector> make_test_cases_perf() { test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, false)); test_cases.emplace_back(new test_conv_2d_dw({512, 512, 256, 1}, {3, 3, 1, 256}, 1, 1, 1, true)); + test_cases.emplace_back(new test_conv_transpose_2d({256, 256, 256, 1}, {3, 3, 16, 256}, 1)); + + test_cases.emplace_back(new test_mean(GGML_TYPE_F32, {256, 256, 3, 1})); + return test_cases; } -static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name, const char * params_filter) { +static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op_name, const char * params_filter, + printer * output_printer) { auto filter_test_cases = [](std::vector> & test_cases, const char * params_filter) { if (params_filter == nullptr) { return; @@ -4644,17 +5669,19 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op filter_test_cases(test_cases, params_filter); ggml_backend_t backend_cpu = ggml_backend_init_by_type(GGML_BACKEND_DEVICE_TYPE_CPU, NULL); if (backend_cpu == NULL) { - printf(" Failed to initialize CPU backend\n"); + test_operation_info info("", "", "CPU"); + info.set_error("backend", "Failed to initialize CPU backend"); + output_printer->print_operation(info); return false; } size_t n_ok = 0; for (auto & test : test_cases) { - if (test->eval(backend, backend_cpu, op_name)) { + if (test->eval(backend, backend_cpu, op_name, output_printer)) { n_ok++; } } - printf(" %zu/%zu tests passed\n", n_ok, test_cases.size()); + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); ggml_backend_free(backend_cpu); @@ -4666,11 +5693,11 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op filter_test_cases(test_cases, params_filter); size_t n_ok = 0; for (auto & test : test_cases) { - if (test->eval_grad(backend, op_name)) { + if (test->eval_grad(backend, op_name, output_printer)) { n_ok++; } } - printf(" %zu/%zu tests passed\n", n_ok, test_cases.size()); + output_printer->print_summary(test_summary_info(n_ok, test_cases.size(), false)); return n_ok == test_cases.size(); } @@ -4679,7 +5706,16 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op auto test_cases = make_test_cases_perf(); filter_test_cases(test_cases, params_filter); for (auto & test : test_cases) { - test->eval_perf(backend, op_name); + test->eval_perf(backend, op_name, output_printer); + } + return true; + } + + if (mode == MODE_SUPPORT) { + auto test_cases = make_test_cases_eval(); + filter_test_cases(test_cases, params_filter); + for (auto & test : test_cases) { + test->eval_support(backend, op_name, output_printer); } return true; } @@ -4688,16 +5724,19 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op } static void usage(char ** argv) { - printf("Usage: %s [mode] [-o ] [-b ] [-p ]\n", argv[0]); + printf("Usage: %s [mode] [-o ] [-b ] [-p ] [--output ]\n", argv[0]); printf(" valid modes:\n"); printf(" - test (default, compare with CPU backend for correctness)\n"); printf(" - grad (compare gradients from backpropagation with method of finite differences)\n"); printf(" - perf (performance evaluation)\n"); + printf(" - support (probe backend operation support)\n"); printf(" op names for -o are as given by ggml_op_desc() (e.g. ADD, MUL_MAT, etc)\n"); + printf(" --output specifies output format (default: console, options: console, sql, csv)\n"); } int main(int argc, char ** argv) { test_mode mode = MODE_TEST; + output_formats output_format = CONSOLE; const char * op_name_filter = nullptr; const char * backend_filter = nullptr; const char * params_filter = nullptr; @@ -4709,6 +5748,8 @@ int main(int argc, char ** argv) { mode = MODE_PERF; } else if (strcmp(argv[i], "grad") == 0) { mode = MODE_GRAD; + } else if (strcmp(argv[i], "support") == 0) { + mode = MODE_SUPPORT; } else if (strcmp(argv[i], "-o") == 0) { if (i + 1 < argc) { op_name_filter = argv[++i]; @@ -4730,6 +5771,16 @@ int main(int argc, char ** argv) { usage(argv); return 1; } + } else if (strcmp(argv[i], "--output") == 0) { + if (i + 1 < argc) { + if (!output_format_from_str(argv[++i], output_format)) { + usage(argv); + return 1; + } + } else { + usage(argv); + return 1; + } } else { usage(argv); return 1; @@ -4739,23 +5790,29 @@ int main(int argc, char ** argv) { // load and enumerate backends ggml_backend_load_all(); - printf("Testing %zu devices\n\n", ggml_backend_dev_count()); + // Create printer for output format + std::unique_ptr output_printer = create_printer(output_format); + if (output_printer) { + output_printer->print_header(); + } + + output_printer->print_testing_start(testing_start_info(ggml_backend_dev_count())); size_t n_ok = 0; for (size_t i = 0; i < ggml_backend_dev_count(); i++) { ggml_backend_dev_t dev = ggml_backend_dev_get(i); - printf("Backend %zu/%zu: %s\n", i + 1, ggml_backend_dev_count(), ggml_backend_dev_name(dev)); - if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_dev_name(dev)) != 0) { - printf(" Skipping\n"); + output_printer->print_backend_init( + backend_init_info(i, ggml_backend_dev_count(), ggml_backend_dev_name(dev), true, "Skipping")); n_ok++; continue; } if (backend_filter == NULL && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_CPU && mode != MODE_GRAD) { - printf(" Skipping CPU backend\n"); + output_printer->print_backend_init(backend_init_info( + i, ggml_backend_dev_count(), ggml_backend_dev_name(dev), true, "Skipping CPU backend")); n_ok++; continue; } @@ -4770,36 +5827,35 @@ int main(int argc, char ** argv) { ggml_backend_set_n_threads_fn(backend, std::thread::hardware_concurrency()); } - printf(" Device description: %s\n", ggml_backend_dev_description(dev)); - size_t free, total; // NOLINT + size_t free, total; // NOLINT ggml_backend_dev_memory(dev, &free, &total); - printf(" Device memory: %zu MB (%zu MB free)\n", total / 1024 / 1024, free / 1024 / 1024); - printf("\n"); + output_printer->print_backend_init(backend_init_info(i, ggml_backend_dev_count(), ggml_backend_dev_name(dev), + false, "", ggml_backend_dev_description(dev), + total / 1024 / 1024, free / 1024 / 1024, true)); - bool ok = test_backend(backend, mode, op_name_filter, params_filter); + bool ok = test_backend(backend, mode, op_name_filter, params_filter, output_printer.get()); - printf(" Backend %s: ", ggml_backend_name(backend)); if (ok) { - printf("\033[1;32mOK\033[0m\n"); n_ok++; - } else { - printf("\033[1;31mFAIL\033[0m\n"); } - - printf("\n"); + output_printer->print_backend_status( + backend_status_info(ggml_backend_name(backend), ok ? test_status_t::OK : test_status_t::FAIL)); ggml_backend_free(backend); } ggml_quantize_free(); - printf("%zu/%zu backends passed\n", n_ok, ggml_backend_dev_count()); + if (output_printer) { + output_printer->print_footer(); + } + + output_printer->print_overall_summary( + overall_summary_info(n_ok, ggml_backend_dev_count(), n_ok == ggml_backend_dev_count())); if (n_ok != ggml_backend_dev_count()) { - printf("\033[1;31mFAIL\033[0m\n"); return 1; } - printf("\033[1;32mOK\033[0m\n"); return 0; } diff --git a/tests/test-c.c b/tests/test-c.c index 95ba73df39a3c..a05071080a1df 100644 --- a/tests/test-c.c +++ b/tests/test-c.c @@ -1,7 +1,3 @@ #include "llama.h" -#ifdef GGML_USE_KOMPUTE -#include "ggml-kompute.h" -#endif - int main(void) {} diff --git a/tests/test-lora-conversion-inference.sh b/tests/test-lora-conversion-inference.sh index 1d1f4886caaa5..0255494b82466 100755 --- a/tests/test-lora-conversion-inference.sh +++ b/tests/test-lora-conversion-inference.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -e # Array of models to iterate over diff --git a/tests/test-thread-safety.cpp b/tests/test-thread-safety.cpp new file mode 100644 index 0000000000000..d525b7430f9d9 --- /dev/null +++ b/tests/test-thread-safety.cpp @@ -0,0 +1,152 @@ +// thread safety test +// - Loads a copy of the same model on each GPU, plus a copy on the CPU +// - Creates n_parallel (--parallel) contexts per model +// - Runs inference in parallel on each context + +#include +#include +#include +#include "llama.h" +#include "arg.h" +#include "common.h" +#include "log.h" +#include "sampling.h" + +int main(int argc, char ** argv) { + common_params params; + + if (!common_params_parse(argc, argv, params, LLAMA_EXAMPLE_COMMON)) { + return 1; + } + + common_init(); + + llama_backend_init(); + llama_numa_init(params.numa); + + LOG_INF("%s\n", common_params_get_system_info(params).c_str()); + + //llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { + // if (level == GGML_LOG_LEVEL_ERROR) { + // common_log_add(common_log_main(), level, "%s", text); + // } + //}, NULL); + + auto cparams = common_context_params_to_llama(params); + + int dev_count = ggml_backend_dev_count(); + int gpu_dev_count = 0; + for (int i = 0; i < dev_count; ++i) { + auto * dev = ggml_backend_dev_get(i); + if (dev && ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { + gpu_dev_count++; + } + } + const int num_models = gpu_dev_count + 1 + 1; // GPUs + 1 CPU model + 1 layer split + //const int num_models = std::max(1, gpu_dev_count); + const int num_contexts = std::max(1, params.n_parallel); + + std::vector models; + std::vector threads; + std::atomic failed = false; + + for (int m = 0; m < num_models; ++m) { + auto mparams = common_model_params_to_llama(params); + + if (m < gpu_dev_count) { + mparams.split_mode = LLAMA_SPLIT_MODE_NONE; + mparams.main_gpu = m; + } else if (m == gpu_dev_count) { + mparams.split_mode = LLAMA_SPLIT_MODE_NONE; + mparams.main_gpu = -1; // CPU model + } else { + mparams.split_mode = LLAMA_SPLIT_MODE_LAYER;; + } + + llama_model * model = llama_model_load_from_file(params.model.path.c_str(), mparams); + if (model == NULL) { + LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.path.c_str()); + return 1; + } + + models.emplace_back(model); + } + + for (int m = 0; m < num_models; ++m) { + auto * model = models[m].get(); + for (int c = 0; c < num_contexts; ++c) { + threads.emplace_back([&, m, c, model]() { + LOG_INF("Creating context %d/%d for model %d/%d\n", c + 1, num_contexts, m + 1, num_models); + + llama_context_ptr ctx { llama_init_from_model(model, cparams) }; + if (ctx == NULL) { + LOG_ERR("failed to create context\n"); + failed.store(true); + return; + } + + std::unique_ptr sampler { common_sampler_init(model, params.sampling), common_sampler_free }; + if (sampler == NULL) { + LOG_ERR("failed to create sampler\n"); + failed.store(true); + return; + } + + llama_batch batch = {}; + { + auto prompt = common_tokenize(ctx.get(), params.prompt, true); + if (prompt.empty()) { + LOG_ERR("failed to tokenize prompt\n"); + failed.store(true); + return; + } + batch = llama_batch_get_one(prompt.data(), prompt.size()); + if (llama_decode(ctx.get(), batch)) { + LOG_ERR("failed to decode prompt\n"); + failed.store(true); + return; + } + } + + const auto * vocab = llama_model_get_vocab(model); + std::string result = params.prompt; + + for (int i = 0; i < params.n_predict; i++) { + llama_token token; + if (batch.n_tokens > 0) { + token = common_sampler_sample(sampler.get(), ctx.get(), batch.n_tokens - 1); + } else { + token = llama_vocab_bos(vocab); + } + + result += common_token_to_piece(ctx.get(), token); + + if (llama_vocab_is_eog(vocab, token)) { + break; + } + + batch = llama_batch_get_one(&token, 1); + if (llama_decode(ctx.get(), batch)) { + LOG_ERR("Model %d/%d, Context %d/%d: failed to decode\n", m + 1, num_models, c + 1, num_contexts); + failed.store(true); + return; + } + } + + LOG_INF("Model %d/%d, Context %d/%d: %s\n\n", m + 1, num_models, c + 1, num_contexts, result.c_str()); + }); + } + } + + for (auto & thread : threads) { + thread.join(); + } + + if (failed) { + LOG_ERR("One or more threads failed.\n"); + return 1; + } + + LOG_INF("All threads finished without errors.\n"); + return 0; +} diff --git a/tests/test-tokenizer-0.sh b/tests/test-tokenizer-0.sh index 4d2b8365547df..7ef009dc90327 100755 --- a/tests/test-tokenizer-0.sh +++ b/tests/test-tokenizer-0.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # # Usage: # diff --git a/tests/test-tokenizers-repo.sh b/tests/test-tokenizers-repo.sh index 86e839133ce62..1158aebae0f1a 100755 --- a/tests/test-tokenizers-repo.sh +++ b/tests/test-tokenizers-repo.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash if [ $# -lt 2 ]; then printf "Usage: $0 []\n" diff --git a/tools/batched-bench/batched-bench.cpp b/tools/batched-bench/batched-bench.cpp index a0a2e5ac56ea9..03628f74b2880 100644 --- a/tools/batched-bench/batched-bench.cpp +++ b/tools/batched-bench/batched-bench.cpp @@ -127,10 +127,9 @@ int main(int argc, char ** argv) { for (int j = 0; j < (is_pp_shared ? 1 : pl); ++j) { for (int i = 0; i < pp; ++i) { - common_batch_add(batch, 0, i, { j }, false); + common_batch_add(batch, 0, i, { j }, i == pp - 1); } } - batch.logits[batch.n_tokens - 1] = true; const auto t_pp_start = ggml_time_us(); diff --git a/tools/gguf-split/tests.sh b/tools/gguf-split/tests.sh index 05a93222711d8..c9ad85da0f1f3 100755 --- a/tools/gguf-split/tests.sh +++ b/tools/gguf-split/tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp index e59d61f195675..b80e984d0245b 100644 --- a/tools/llama-bench/llama-bench.cpp +++ b/tools/llama-bench/llama-bench.cpp @@ -267,6 +267,7 @@ struct cmd_params { int delay; bool verbose; bool progress; + bool no_warmup; output_formats output_format; output_formats output_format_stderr; }; @@ -303,6 +304,7 @@ static const cmd_params cmd_params_defaults = { /* delay */ 0, /* verbose */ false, /* progress */ false, + /* no_warmup */ false, /* output_format */ MARKDOWN, /* output_format_stderr */ NONE, }; @@ -325,6 +327,7 @@ static void print_usage(int /* argc */, char ** argv) { output_format_str(cmd_params_defaults.output_format_stderr)); printf(" -v, --verbose verbose output\n"); printf(" --progress print test progress indicators\n"); + printf(" --no-warmup skip warmup runs before benchmarking\n"); printf("\n"); printf("test parameters:\n"); printf(" -m, --model (default: %s)\n", join(cmd_params_defaults.model, ",").c_str()); @@ -425,6 +428,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.prio = cmd_params_defaults.prio; params.delay = cmd_params_defaults.delay; params.progress = cmd_params_defaults.progress; + params.no_warmup = cmd_params_defaults.no_warmup; for (int i = 1; i < argc; i++) { arg = argv[i]; @@ -798,6 +802,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { params.verbose = true; } else if (arg == "--progress") { params.progress = true; + } else if (arg == "--no-warmup") { + params.no_warmup = true; } else { invalid_param = true; break; @@ -1925,25 +1931,27 @@ int main(int argc, char ** argv) { llama_attach_threadpool(ctx, threadpool, NULL); // warmup run - if (t.n_prompt > 0) { - if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count); - } - //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); - bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads); - if (!res) { - fprintf(stderr, "%s: error: failed to run prompt warmup\n", __func__); - exit(1); - } - } - if (t.n_gen > 0) { - if (params.progress) { - fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count); + if (!params.no_warmup) { + if (t.n_prompt > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup prompt run\n", params_idx, params_count); + } + //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); + bool res = test_prompt(ctx, t.n_prompt, t.n_batch, t.n_threads); + if (!res) { + fprintf(stderr, "%s: error: failed to run prompt warmup\n", __func__); + exit(1); + } } - bool res = test_gen(ctx, 1, t.n_threads); - if (!res) { - fprintf(stderr, "%s: error: failed to run gen warmup\n", __func__); - exit(1); + if (t.n_gen > 0) { + if (params.progress) { + fprintf(stderr, "llama-bench: benchmark %d/%zu: warmup generation run\n", params_idx, params_count); + } + bool res = test_gen(ctx, 1, t.n_threads); + if (!res) { + fprintf(stderr, "%s: error: failed to run gen warmup\n", __func__); + exit(1); + } } } diff --git a/tools/main/main.cpp b/tools/main/main.cpp index 19b247b0d672f..516bf09652484 100644 --- a/tools/main/main.cpp +++ b/tools/main/main.cpp @@ -292,6 +292,7 @@ int main(int argc, char ** argv) { if (!params.system_prompt.empty() || !params.prompt.empty()) { common_chat_templates_inputs inputs; + inputs.use_jinja = g_params->use_jinja; inputs.messages = chat_msgs; inputs.add_generation_prompt = !params.prompt.empty(); @@ -916,10 +917,19 @@ int main(int argc, char ** argv) { embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); embd_inp.insert(embd_inp.end(), line_sfx.begin(), line_sfx.end()); + if (params.verbose_prompt) { + LOG_INF("%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size() - original_size); + } + for (size_t i = original_size; i < embd_inp.size(); ++i) { const llama_token token = embd_inp[i]; + const std::string token_str = common_token_to_piece(ctx, token); output_tokens.push_back(token); - output_ss << common_token_to_piece(ctx, token); + output_ss << token_str; + + if (params.verbose_prompt) { + LOG_INF("%6d -> '%s'\n", token, token_str.c_str()); + } } // reset assistant message diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index c25bacc17769b..9146c9e9c4481 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -187,7 +187,7 @@ struct clip_hparams { float eps = 1e-6; float rope_theta = 0.0; - std::vector image_grid_pinpoints; + std::vector image_res_candidates; // for llava-uhd style models int32_t image_crop_resolution; std::unordered_set vision_feature_layer; int32_t attn_window_size = 0; @@ -1405,8 +1405,7 @@ struct clip_graph { ggml_tensor * x = embeddings; embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_2_w, embeddings); x = ggml_mul_mat(ctx0, model.mm_model_mlp_1_w,x); - embeddings = ggml_silu_inplace(ctx0, embeddings); - embeddings = ggml_mul(ctx0, embeddings,x); + embeddings = ggml_swiglu_split(ctx0, embeddings, x); embeddings = ggml_mul_mat(ctx0, model.mm_model_mlp_3_w, embeddings); } // arrangement of BOI/EOI token embeddings @@ -1502,15 +1501,8 @@ struct clip_graph { cur = ggml_mul_mat(ctx0, model.mm_1_w, cur); // swiglu - { - int64_t split_point = cur->ne[0] / 2; - ggml_tensor * x0 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], 0)); - ggml_tensor * x1 = ggml_cont(ctx0, ggml_view_2d(ctx0, cur, split_point, cur->ne[1], cur->nb[1], split_point * ggml_element_size(cur))); - - // see SwiGLU in ultravox_model.py, the second half passed through is silu, not the first half - x1 = ggml_silu(ctx0, x1); - cur = ggml_mul(ctx0, x0, x1); - } + // see SwiGLU in ultravox_model.py, the second half passed through is silu, not the first half + cur = ggml_swiglu_swapped(ctx0, cur); // mid-norm cur = ggml_rms_norm(ctx0, cur, 1e-6); @@ -1769,35 +1761,42 @@ struct clip_graph { cur = tmp; } + // we only support parallel ffn for now switch (type_op) { case FFN_SILU: - { + if (gate) { + cur = ggml_swiglu_split(ctx0, cur, tmp); + cb(cur, "ffn_swiglu", il); + } else { cur = ggml_silu(ctx0, cur); cb(cur, "ffn_silu", il); } break; case FFN_GELU: - { + if (gate) { + cur = ggml_geglu_split(ctx0, cur, tmp); + cb(cur, "ffn_geglu", il); + } else { cur = ggml_gelu(ctx0, cur); cb(cur, "ffn_gelu", il); } break; case FFN_GELU_ERF: - { + if (gate) { + cur = ggml_geglu_erf_split(ctx0, cur, tmp); + cb(cur, "ffn_geglu_erf", il); + } else { cur = ggml_gelu_erf(ctx0, cur); - cb(cur, "ggml_gelu_erf", il); + cb(cur, "ffn_gelu_erf", il); } break; case FFN_GELU_QUICK: - { + if (gate) { + cur = ggml_geglu_quick_split(ctx0, cur, tmp); + cb(cur, "ffn_geglu_quick", il); + } else { cur = ggml_gelu_quick(ctx0, cur); - cb(cur, "ffn_relu", il); + cb(cur, "ffn_gelu_quick", il); } break; } - // we only support parallel ffn for now - if (gate) { - cur = ggml_mul(ctx0, cur, tmp); - cb(cur, "ffn_gate_par", il); - } - if (down) { cur = ggml_mul_mat(ctx0, down, cur); } @@ -2109,8 +2108,7 @@ struct clip_model_loader { if (is_vision) { get_u32(KEY_IMAGE_SIZE, hparams.image_size); get_u32(KEY_PATCH_SIZE, hparams.patch_size); - get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false); - get_arr_int(KEY_IMAGE_GRID_PINPOINTS, hparams.image_grid_pinpoints, false); + get_u32(KEY_IMAGE_CROP_RESOLUTION, hparams.image_crop_resolution, false); get_i32(KEY_MINICPMV_VERSION, hparams.minicpmv_version, false); // legacy } else if (is_audio) { @@ -2120,6 +2118,20 @@ struct clip_model_loader { GGML_ASSERT(false && "unknown modality"); } + // for pinpoints, we need to convert it into a list of resolution candidates + { + std::vector pinpoints; + get_arr_int(KEY_IMAGE_GRID_PINPOINTS, pinpoints, false); + if (!pinpoints.empty()) { + for (size_t i = 0; i < pinpoints.size(); i += 2) { + hparams.image_res_candidates.push_back({ + pinpoints[i], + pinpoints[i+1], + }); + } + } + } + // default warmup value hparams.warmup_image_size = hparams.image_size; @@ -2198,6 +2210,9 @@ struct clip_model_loader { { hparams.rope_theta = 10000.0f; hparams.warmup_image_size = hparams.patch_size * 8; + // Mistral Small 2506 needs 1024x1024 image size cap to prevent OOM + // ref: https://github.com/ggml-org/llama.cpp/issues/14310 + hparams.image_size = 1024; get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.spatial_merge_size, false); } break; case PROJECTOR_TYPE_GEMMA3: @@ -2231,16 +2246,7 @@ struct clip_model_loader { { hparams.rope_theta = 10000.0f; get_u32(KEY_PROJ_SCALE_FACTOR, hparams.proj_scale_factor); - - // borrowed from llava-1.6 - const int isize = hparams.image_size; - hparams.image_grid_pinpoints = { - isize, isize*2, // 336, 672 - isize*2, isize, // 672, 336 - isize*2, isize*2, // 672, 672 - isize*3, isize, // 1008, 336 - isize, isize*3, // 336, 1008 - }; + set_llava_uhd_res_candidates(model, 3); } break; case PROJECTOR_TYPE_ULTRAVOX: case PROJECTOR_TYPE_QWEN2A: @@ -2674,6 +2680,21 @@ struct clip_model_loader { output[i] = values[i]; } } + + void set_llava_uhd_res_candidates(clip_model & model, const int max_patches_per_side) { + auto & hparams = model.hparams; + for (int x = 1; x <= max_patches_per_side; x++) { + for (int y = 1; y <= max_patches_per_side; y++) { + if (x == 1 && y == 1) { + continue; // skip the first point + } + hparams.image_res_candidates.push_back(clip_image_size{ + x*hparams.image_size, + y*hparams.image_size, + }); + } + } + } }; struct clip_init_result clip_init(const char * fname, struct clip_context_params ctx_params) { @@ -3028,36 +3049,41 @@ struct llava_uhd { bool padding_refined = false; // if true, refine image will be padded to the grid size (e.g. llava-1.6) }; - static int get_max_slices(struct clip_ctx * ctx) { - if (clip_is_minicpmv(ctx)) { - return 9; - } - return 0; - } - static slice_instructions get_slice_instructions(struct clip_ctx * ctx, const clip_image_size & original_size) { slice_instructions res; const int patch_size = clip_get_patch_size(ctx); const int slice_size = clip_get_image_size(ctx); - const int max_slice_nums = get_max_slices(ctx); const int original_width = original_size.width; const int original_height = original_size.height; - const float log_ratio = log((float)original_width / original_height); - const float ratio = (float)original_width * original_height / (slice_size * slice_size); - const int multiple = fmin(ceil(ratio), max_slice_nums); - const bool has_slices = (multiple > 1); - const bool has_pinpoints = !ctx->model.hparams.image_grid_pinpoints.empty(); + + const bool has_slices = original_size.width > slice_size || original_size.height > slice_size; + const bool has_pinpoints = !ctx->model.hparams.image_res_candidates.empty(); + + if (!has_slices) { + // skip slicing logic + res.overview_size = clip_image_size{slice_size, slice_size}; + res.refined_size = clip_image_size{0, 0}; + res.grid_size = clip_image_size{0, 0}; + + return res; + } if (has_pinpoints) { // has pinpoints, use them to calculate the grid size (e.g. llava-1.6) auto refine_size = llava_uhd::select_best_resolution( - ctx->model.hparams.image_grid_pinpoints, - original_size); + original_size, + ctx->model.hparams.image_res_candidates); res.overview_size = clip_image_size{slice_size, slice_size}; res.refined_size = refine_size; res.grid_size = clip_image_size{0, 0}; res.padding_refined = true; + LOG_DBG("%s: using pinpoints for slicing\n", __func__); + LOG_DBG("%s: original size: %d x %d, overview size: %d x %d, refined size: %d x %d\n", + __func__, original_width, original_height, + res.overview_size.width, res.overview_size.height, + res.refined_size.width, res.refined_size.height); + for (int y = 0; y < refine_size.height; y += slice_size) { for (int x = 0; x < refine_size.width; x += slice_size) { slice_coordinates slice; @@ -3066,13 +3092,16 @@ struct llava_uhd { slice.size.width = std::min(slice_size, refine_size.width - x); slice.size.height = std::min(slice_size, refine_size.height - y); res.slices.push_back(slice); - if (x == 0) { - res.grid_size.width++; - } + LOG_DBG("%s: slice %d: x=%d, y=%d, size=%dx%d\n", + __func__, (int)res.slices.size() - 1, + slice.x, slice.y, slice.size.width, slice.size.height); } - res.grid_size.height++; } + res.grid_size.height = refine_size.height / slice_size; + res.grid_size.width = refine_size.width / slice_size; + LOG_DBG("%s: grid size: %d x %d\n", __func__, res.grid_size.width, res.grid_size.height); + return res; } @@ -3081,17 +3110,23 @@ struct llava_uhd { auto best_size = get_best_resize(original_size, slice_size, patch_size, !has_slices); res.overview_size = best_size; - if (!has_slices) { - // skip slicing logic - res.refined_size = clip_image_size{0, 0}; - res.grid_size = clip_image_size{0, 0}; + { + const int max_slice_nums = 9; // TODO: this is only used by minicpmv, maybe remove it + const float log_ratio = log((float)original_width / original_height); + const float ratio = (float)original_width * original_height / (slice_size * slice_size); + const int multiple = fmin(ceil(ratio), max_slice_nums); - } else { auto best_grid = get_best_grid(max_slice_nums, multiple, log_ratio); auto refine_size = get_refine_size(original_size, best_grid, slice_size, patch_size, true); res.grid_size = best_grid; res.refined_size = refine_size; + LOG_DBG("%s: original size: %d x %d, overview size: %d x %d, refined size: %d x %d, grid size: %d x %d\n", + __func__, original_width, original_height, + res.overview_size.width, res.overview_size.height, + res.refined_size.width, res.refined_size.height, + res.grid_size.width, res.grid_size.height); + int width = refine_size.width; int height = refine_size.height; int grid_x = int(width / best_grid.width); @@ -3108,7 +3143,9 @@ struct llava_uhd { slice.size.width = grid_x; slice.size.height = grid_y; res.slices.push_back(slice); - // LOG_INF("slice %d: %d %d %d %d\n", ic, patches_i, patches_j, grid_x, grid_y); + LOG_DBG("%s: slice %d: x=%d, y=%d, size=%dx%d\n", + __func__, (int)res.slices.size() - 1, + slice.x, slice.y, slice.size.width, slice.size.height); } } } @@ -3166,48 +3203,55 @@ struct llava_uhd { return res; } + static clip_image_size resize_maintain_aspect_ratio(const clip_image_size & orig, const clip_image_size & target_max) { + float scale_width = static_cast(target_max.width) / orig.width; + float scale_height = static_cast(target_max.height) / orig.height; + float scale = std::min(scale_width, scale_height); + return clip_image_size{ + static_cast(orig.width * scale), + static_cast(orig.height * scale), + }; + } + /** * Selects the best resolution from a list of possible resolutions based on the original size. * + * For example, when given a list of resolutions: + * - 100x100 + * - 200x100 + * - 100x200 + * - 200x200 + * + * And an input image of size 111x200, then 100x200 is the best fit (least wasted resolution). + * * @param original_size The original size of the image * @param possible_resolutions A list of possible resolutions * @return The best fit resolution */ static clip_image_size select_best_resolution(const clip_image_size & original_size, const std::vector & possible_resolutions) { - int original_width = original_size.width; - int original_height = original_size.height; clip_image_size best_fit; + int min_wasted_area = std::numeric_limits::max(); int max_effective_resolution = 0; - int min_wasted_resolution = std::numeric_limits::max(); - - for (const auto & resolution : possible_resolutions) { - int width = resolution.width; - int height = resolution.height; - float scale = std::min(static_cast(width) / original_width, static_cast(height) / original_height); - int downscaled_width = static_cast(original_width * scale); - int downscaled_height = static_cast(original_height * scale); - int effective_resolution = std::min(downscaled_width * downscaled_height, original_width * original_height); - int wasted_resolution = (width * height) - effective_resolution; - // LOG_INF("resolution: %d %d, scale: %f, downscaled: %d %d, effective: %d, wasted: %d\n", width, height, scale, downscaled_width, downscaled_height, effective_resolution, wasted_resolution); - if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_resolution < min_wasted_resolution)) { + + for (const clip_image_size & candidate : possible_resolutions) { + auto target_size = resize_maintain_aspect_ratio(original_size, candidate); + int effective_resolution = std::min( + target_size.width * target_size.height, + original_size.width * original_size.height); + int wasted_area = (candidate.width * candidate.height) - effective_resolution; + + if (effective_resolution > max_effective_resolution || (effective_resolution == max_effective_resolution && wasted_area < min_wasted_area)) { max_effective_resolution = effective_resolution; - min_wasted_resolution = wasted_resolution; - best_fit = resolution; + min_wasted_area = wasted_area; + best_fit = candidate; } + + LOG_DBG("%s: candidate: %d x %d, target: %d x %d, wasted: %d, effective: %d\n", __func__, candidate.width, candidate.height, target_size.width, target_size.height, wasted_area, effective_resolution); } return best_fit; } - // used by llava 1.6 with custom list of pinpoints - static clip_image_size select_best_resolution(const std::vector & pinpoints, const clip_image_size & original_size) { - std::vector possible_resolutions; // TODO @ngxson : construct this inside hparams, not here - for (size_t i = 0; i < pinpoints.size(); i += 2) { - possible_resolutions.push_back(clip_image_size{pinpoints[i], pinpoints[i+1]}); - } - return select_best_resolution(original_size, possible_resolutions); - } - static int ensure_divide(int length, int patch_size) { return std::max(static_cast(std::round(static_cast(length) / patch_size) * patch_size), patch_size); } @@ -3331,7 +3375,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str return true; } else if (ctx->proj_type() == PROJECTOR_TYPE_LLAMA4) { - GGML_ASSERT(!params.image_grid_pinpoints.empty()); + GGML_ASSERT(!params.image_res_candidates.empty()); auto const inst = llava_uhd::get_slice_instructions(ctx, original_size); std::vector imgs = llava_uhd::slice_image(img, inst); @@ -3371,7 +3415,7 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, str res_imgs->entries.push_back(std::move(res)); return true; - } else if (!params.image_grid_pinpoints.empty()) { + } else if (!params.image_res_candidates.empty()) { // "spatial_unpad" with "anyres" processing for llava-1.6 auto const inst = llava_uhd::get_slice_instructions(ctx, original_size); std::vector imgs = llava_uhd::slice_image(img, inst); @@ -3431,17 +3475,6 @@ const char * clip_patch_merge_type(const struct clip_ctx * ctx) { return ctx->model.hparams.mm_patch_merge_type == PATCH_MERGE_SPATIAL_UNPAD ? "spatial_unpad" : "flat"; } -const int32_t * clip_image_grid(const struct clip_ctx * ctx) { - if (ctx->model.hparams.image_grid_pinpoints.size()) { - return &ctx->model.hparams.image_grid_pinpoints.front(); - } - return nullptr; -} - -size_t get_clip_image_grid_size(const struct clip_ctx * ctx) { - return ctx->model.hparams.image_grid_pinpoints.size(); -} - int clip_n_output_tokens_x(const struct clip_ctx * ctx, struct clip_image_f32 * img) { const auto & params = ctx->model.hparams; const int n_total = clip_n_output_tokens(ctx, img); diff --git a/tools/mtmd/clip.h b/tools/mtmd/clip.h index cb2eb261fe2e8..08f3efb7b1daf 100644 --- a/tools/mtmd/clip.h +++ b/tools/mtmd/clip.h @@ -46,9 +46,6 @@ int32_t clip_get_hidden_size(const struct clip_ctx * ctx); // TODO: should be enum, not string const char * clip_patch_merge_type(const struct clip_ctx * ctx); -const int32_t * clip_image_grid(const struct clip_ctx * ctx); -size_t get_clip_image_grid_size(const struct clip_ctx * ctx); - int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * img); // for M-RoPE, this will be the number of token positions in X and Y directions diff --git a/tools/mtmd/mtmd.cpp b/tools/mtmd/mtmd.cpp index 8573f11437f1b..e3829738338c3 100644 --- a/tools/mtmd/mtmd.cpp +++ b/tools/mtmd/mtmd.cpp @@ -501,7 +501,10 @@ struct mtmd_tokenizer { || ctx->slice_tmpl == MTMD_SLICE_TMPL_MINICPMV_2_6 || ctx->slice_tmpl == MTMD_SLICE_TMPL_LLAMA4 ) { + const int n_col = batch_f32.grid_x; + const int n_row = batch_f32.grid_y; // split batch into chunks of single images + // NOTE: batch_f32 will be invalidated after this call auto chunks = split_batch_to_chunk(std::move(batch_f32), bitmap->id); GGML_ASSERT(chunks.size() > 0); @@ -521,8 +524,7 @@ struct mtmd_tokenizer { // add slices (or tiles) if (!chunks.empty()) { - const int n_col = batch_f32.grid_x; - const int n_row = batch_f32.grid_y; + GGML_ASSERT((int)chunks.size() == n_row * n_col); if (ctx->tok_slices_start != LLAMA_TOKEN_NULL) { add_text({ctx->tok_slices_start}); } diff --git a/tools/mtmd/tests.sh b/tools/mtmd/tests.sh index aa0019893283e..b25024c2f10ef 100755 --- a/tools/mtmd/tests.sh +++ b/tools/mtmd/tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # make sure we are in the right directory SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) diff --git a/tools/quantize/quantize.cpp b/tools/quantize/quantize.cpp index 3f54af7c58158..8acc765178846 100644 --- a/tools/quantize/quantize.cpp +++ b/tools/quantize/quantize.cpp @@ -107,13 +107,11 @@ static bool try_parse_ftype(const std::string & ftype_str_in, llama_ftype & ftyp return false; } -// usage: -// ./llama-quantize [--allow-requantize] [--leave-output-tensor] [--pure] models/llama/ggml-model.gguf [models/llama/ggml-model-quant.gguf] type [nthreads] -// [[noreturn]] static void usage(const char * executable) { - printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights] [--exclude-weights] [--output-tensor-type]\n", executable); - printf(" [--token-embedding-type] [--tensor-type] [--keep-split] [--override-kv] model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); + printf("usage: %s [--help] [--allow-requantize] [--leave-output-tensor] [--pure] [--imatrix] [--include-weights]\n", executable); + printf(" [--exclude-weights] [--output-tensor-type] [--token-embedding-type] [--tensor-type] [--prune-layers] [--keep-split] [--override-kv]\n"); + printf(" model-f32.gguf [model-quant.gguf] type [nthreads]\n\n"); printf(" --allow-requantize: Allows requantizing tensors that have already been quantized. Warning: This can severely reduce quality compared to quantizing from 16bit or 32bit\n"); printf(" --leave-output-tensor: Will leave output.weight un(re)quantized. Increases model size but may also increase quality, especially when requantizing\n"); printf(" --pure: Disable k-quant mixtures and quantize all tensors to the same type\n"); @@ -124,6 +122,8 @@ static void usage(const char * executable) { printf(" --token-embedding-type ggml_type: use this ggml_type for the token embeddings tensor\n"); printf(" --tensor-type TENSOR=TYPE: quantize this tensor to this ggml_type. example: --tensor-type attn_q=q8_0\n"); printf(" Advanced option to selectively quantize tensors. May be specified multiple times.\n"); + printf(" --prune-layers L0,L1,L2...comma-separated list of layer numbers to prune from the model\n"); + printf(" Advanced option to remove all tensors from the given layers\n"); printf(" --keep-split: will generate quantized model in the same shards as input\n"); printf(" --override-kv KEY=TYPE:VALUE\n"); printf(" Advanced option to override model metadata by key in the quantized model. May be specified multiple times.\n"); @@ -286,6 +286,32 @@ static bool parse_tensor_type(const char * data, std::vector & prune_layers) { + if (!data) { + printf("\n%s: no layer pruning ids provided\n\n", __func__); + return false; + } + + const auto block_ids = string_split(data, ','); + for (const auto & block_id : block_ids) { + int id; + try { + id = std::stoi(block_id); + } catch (...) { + id = -1; + } + if (id < 0) { + printf("\n%s: invalid layer id '%s'\n\n", __func__, block_id.c_str()); + return false; + } + prune_layers.emplace_back(id); + } + + sort(prune_layers.begin(), prune_layers.end()); + prune_layers.erase(std::unique(prune_layers.begin(), prune_layers.end()), prune_layers.end()); + return true; +} + int main(int argc, char ** argv) { if (argc < 3) { usage(argv[0]); @@ -298,6 +324,7 @@ int main(int argc, char ** argv) { std::vector included_weights, excluded_weights; std::vector kv_overrides; std::vector tensor_types; + std::vector prune_layers; for (; arg_idx < argc && strncmp(argv[arg_idx], "--", 2) == 0; arg_idx++) { if (strcmp(argv[arg_idx], "--leave-output-tensor") == 0) { @@ -324,6 +351,10 @@ int main(int argc, char ** argv) { if (arg_idx == argc-1 || !parse_tensor_type(argv[++arg_idx], tensor_types)) { usage(argv[0]); } + } else if (strcmp(argv[arg_idx], "--prune-layers") == 0) { + if (arg_idx == argc-1 || !parse_layer_prune(argv[++arg_idx], prune_layers)) { + usage(argv[0]); + } } else if (strcmp(argv[arg_idx], "--override-kv") == 0) { if (arg_idx == argc-1 || !string_parse_kv_override(argv[++arg_idx], kv_overrides)) { usage(argv[0]); @@ -411,6 +442,9 @@ int main(int argc, char ** argv) { if (!tensor_types.empty()) { params.tensor_types = &tensor_types; } + if (!prune_layers.empty()) { + params.prune_layers = &prune_layers; + } llama_backend_init(); diff --git a/tools/quantize/tests.sh b/tools/quantize/tests.sh index 70f7610f9877f..ba96161484232 100644 --- a/tools/quantize/tests.sh +++ b/tools/quantize/tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash set -eu diff --git a/tools/run/CMakeLists.txt b/tools/run/CMakeLists.txt index 7cff188ca69f0..d0189596980eb 100644 --- a/tools/run/CMakeLists.txt +++ b/tools/run/CMakeLists.txt @@ -7,8 +7,7 @@ if (LLAMA_CURL) find_package(CURL REQUIRED) target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) include_directories(${CURL_INCLUDE_DIRS}) - find_library(CURL_LIBRARY curl REQUIRED) - set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARY}) + set(LLAMA_RUN_EXTRA_LIBS ${LLAMA_RUN_EXTRA_LIBS} ${CURL_LIBRARIES}) endif () install(TARGETS ${TARGET} RUNTIME) diff --git a/tools/run/run.cpp b/tools/run/run.cpp index c65afd61e023c..6fe728c685358 100644 --- a/tools/run/run.cpp +++ b/tools/run/run.cpp @@ -9,6 +9,9 @@ #include #if defined(_WIN32) +# ifndef NOMINMAX +# define NOMINMAX +# endif # include # include #else @@ -939,17 +942,30 @@ static int apply_chat_template(const struct common_chat_templates * tmpls, Llama // Function to tokenize the prompt static int tokenize_prompt(const llama_vocab * vocab, const std::string & prompt, std::vector & prompt_tokens, const LlamaData & llama_data) { - const bool is_first = llama_memory_seq_pos_max(llama_get_memory(llama_data.context.get()), 0) == 0; - - const int n_prompt_tokens = -llama_tokenize(vocab, prompt.c_str(), prompt.size(), NULL, 0, is_first, true); - prompt_tokens.resize(n_prompt_tokens); - if (llama_tokenize(vocab, prompt.c_str(), prompt.size(), prompt_tokens.data(), prompt_tokens.size(), is_first, - true) < 0) { - printe("failed to tokenize the prompt\n"); + const bool is_first = llama_memory_seq_pos_max(llama_get_memory(llama_data.context.get()), 0) == -1; + int n_tokens = prompt.size() + 2 * is_first; + prompt_tokens.resize(n_tokens); + n_tokens = llama_tokenize(vocab, prompt.c_str(), prompt.size(), + prompt_tokens.data(), prompt_tokens.size(), + is_first, /*parse_special =*/true); + if (n_tokens == std::numeric_limits::min()) { + printe("tokenization failed: input too large\n"); return -1; } - - return n_prompt_tokens; + if (n_tokens < 0) { + prompt_tokens.resize(-n_tokens); + int check = llama_tokenize(vocab, prompt.c_str(), prompt.size(), + prompt_tokens.data(), prompt_tokens.size(), + is_first, /*parse_special =*/true); + if (check != -n_tokens) { + printe("failed to tokenize the prompt (size mismatch)\n"); + return -1; + } + n_tokens = check; + } else { + prompt_tokens.resize(n_tokens); + } + return n_tokens; } // Check if we have enough space in the context to evaluate this batch diff --git a/tools/server/README.md b/tools/server/README.md index 06533c172e530..e29511cb1b457 100644 --- a/tools/server/README.md +++ b/tools/server/README.md @@ -7,7 +7,7 @@ Set of LLM REST APIs and a simple web front end to interact with llama.cpp. **Features:** * LLM inference of F16 and quantized models on GPU and CPU * [OpenAI API](https://github.com/openai/openai-openapi) compatible chat completions and embeddings routes - * Reranking endoint (https://github.com/ggml-org/llama.cpp/pull/9510) + * Reranking endpoint (https://github.com/ggml-org/llama.cpp/pull/9510) * Parallel decoding with multi-user support * Continuous batching * Multimodal ([documentation](../../docs/multimodal.md)) / with OpenAI-compatible API support @@ -164,6 +164,7 @@ The project is under active development, and we are [looking for feedback and co | `--api-key-file FNAME` | path to file containing API keys (default: none) | | `--ssl-key-file FNAME` | path to file a PEM-encoded SSL private key
(env: LLAMA_ARG_SSL_KEY_FILE) | | `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate
(env: LLAMA_ARG_SSL_CERT_FILE) | +| `--chat-template-kwargs STRING` | JSON object containing additional params for the json template parser. Example: `--chat_template_kwargs "{\"enable_thinking\":false}`"
(env: LLAMA_CHAT_TEMPLATE_KWARGS) | | `-to, --timeout N` | server read/write timeout in seconds (default: 600)
(env: LLAMA_ARG_TIMEOUT) | | `--threads-http N` | number of threads used to process HTTP requests (default: -1)
(env: LLAMA_ARG_THREADS_HTTP) | | `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting (default: 0)
[(card)](https://ggml.ai/f0.png)
(env: LLAMA_ARG_CACHE_REUSE) | @@ -187,6 +188,8 @@ The project is under active development, and we are [looking for feedback and co | `-devd, --device-draft ` | comma-separated list of devices to use for offloading the draft model (none = don't offload)
use --list-devices to see a list of available devices | | `-ngld, --gpu-layers-draft, --n-gpu-layers-draft N` | number of layers to store in VRAM for the draft model
(env: LLAMA_ARG_N_GPU_LAYERS_DRAFT) | | `-md, --model-draft FNAME` | draft model for speculative decoding (default: unused)
(env: LLAMA_ARG_MODEL_DRAFT) | +| `-ctkd, --cache-type-k-draft TYPE` | KV cache data type for K for speculative decoding model
allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
(default: f16)
(env: LLAMA_ARG_CACHE_TYPE_K_DRAFT) | +| `-ctvd, --cache-type-v-draft TYPE` | KV cache data type for V for speculative decoding model
allowed values: f32, f16, bf16, q8_0, q4_0, q4_1, iq4_nl, q5_0, q5_1
(default: f16)
(env: LLAMA_ARG_CACHE_TYPE_V_DRAFT) | | `-mv, --model-vocoder FNAME` | vocoder model for audio generation (default: unused) | | `--tts-use-guide-tokens` | Use guide tokens to improve TTS word recall | | `--embd-bge-small-en-default` | use default bge-small-en-v1.5 model (note: can download weights from the internet) | @@ -368,6 +371,8 @@ node index.js ### GET `/health`: Returns heath check result +This endpoint is public (no API key check). + **Response format** - HTTP status code 503 @@ -706,7 +711,7 @@ If the tokens are missing, then the extra context is simply prefixed at the star ### **GET** `/props`: Get server global properties. -This endpoint is public (no API key check). By default, it is read-only. To make POST request to change global properties, you need to start server with `--props` +By default, it is read-only. To make POST request to change global properties, you need to start server with `--props` **Response format** @@ -1114,6 +1119,8 @@ See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}` or `{"type": "json_schema", "schema": {"properties": { "name": { "title": "Name", "type": "string" }, "date": { "title": "Date", "type": "string" }, "participants": { "items": {"type: "string" }, "title": "Participants", "type": "string" } } } }`), similar to other OpenAI-inspired API providers. +`chat_template_kwargs`: Allows sending additional parameters to the json templating system. For example: `{"enable_thinking": false}` + *Examples:* You can use either Python `openai` library with appropriate checkpoints: diff --git a/tools/server/chat-llama2.sh b/tools/server/chat-llama2.sh index 1fc79b7e19137..450445f17e3f1 100755 --- a/tools/server/chat-llama2.sh +++ b/tools/server/chat-llama2.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash API_URL="${API_URL:-http://127.0.0.1:8080}" diff --git a/tools/server/chat.sh b/tools/server/chat.sh index da0a6ca68ca6f..84cea2d56a0d4 100755 --- a/tools/server/chat.sh +++ b/tools/server/chat.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash API_URL="${API_URL:-http://127.0.0.1:8080}" diff --git a/tools/server/public/index.html.gz b/tools/server/public/index.html.gz index 0fb01665ae5cc..53b71079c1e2a 100644 Binary files a/tools/server/public/index.html.gz and b/tools/server/public/index.html.gz differ diff --git a/tools/server/server.cpp b/tools/server/server.cpp index b439d8b19dda3..0afe213af1e47 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -88,6 +88,26 @@ enum error_type { ERROR_TYPE_NOT_SUPPORTED, // custom error }; +static bool server_task_type_need_embd(server_task_type task_type) { + switch (task_type) { + case SERVER_TASK_TYPE_EMBEDDING: + case SERVER_TASK_TYPE_RERANK: + return true; + default: + return false; + } +} + +static bool server_task_type_need_logits(server_task_type task_type) { + switch (task_type) { + case SERVER_TASK_TYPE_COMPLETION: + case SERVER_TASK_TYPE_INFILL: + return true; + default: + return false; + } +} + struct slot_params { bool stream = true; bool cache_prompt = true; // remember the prompt to avoid reprocessing all prompt @@ -107,7 +127,6 @@ struct slot_params { std::vector response_fields; bool timings_per_token = false; bool post_sampling_probs = false; - bool ignore_eos = false; struct common_params_sampling sampling; struct common_params_speculative speculative; @@ -421,7 +440,6 @@ struct server_task { { params.sampling.logit_bias.clear(); - params.ignore_eos = json_value(data, "ignore_eos", false); const auto & logit_bias = data.find("logit_bias"); if (logit_bias != data.end() && logit_bias->is_array()) { @@ -452,6 +470,13 @@ struct server_task { } } } + + params.sampling.ignore_eos = json_value(data, "ignore_eos", params_base.sampling.ignore_eos); + if (params.sampling.ignore_eos) { + params.sampling.logit_bias.insert( + params.sampling.logit_bias.end(), + defaults.sampling.logit_bias_eog.begin(), defaults.sampling.logit_bias_eog.end()); + } } { @@ -1330,13 +1355,24 @@ struct server_slot { n_draft_accepted = 0; } - bool is_non_causal() const { - return task_type == SERVER_TASK_TYPE_EMBEDDING || task_type == SERVER_TASK_TYPE_RERANK; + bool need_embd() const { + return server_task_type_need_embd(task_type); + } + + bool need_logits() const { + return server_task_type_need_logits(task_type); + } + + // if the context does not have a memory module then all embeddings have to be computed within a single ubatch + // also we cannot split if the pooling would require any past tokens + bool can_split() const { + return + !need_embd() || + (llama_get_memory(ctx) && llama_pooling_type(ctx) == LLAMA_POOLING_TYPE_LAST); } bool can_batch_with(server_slot & other_slot) const { - return is_non_causal() == other_slot.is_non_causal() - && are_lora_equal(lora, other_slot.lora); + return task_type == other_slot.task_type && are_lora_equal(lora, other_slot.lora); } bool has_budget(const common_params & global_params) { @@ -1480,7 +1516,6 @@ struct server_slot { {"n_ctx", n_ctx}, {"speculative", can_speculate()}, {"is_processing", is_processing()}, - {"non_causal", is_non_causal()}, {"params", params.to_json()}, {"prompt", prompt_tokens.detokenize(ctx, true)}, {"next_token", @@ -1868,7 +1903,6 @@ struct server_context { bool clean_kv_cache = true; bool add_bos_token = true; - bool has_eos_token = false; int32_t n_ctx; // total context for all clients / slots @@ -1927,7 +1961,6 @@ struct server_context { n_ctx = llama_n_ctx(ctx); add_bos_token = llama_vocab_get_add_bos(vocab); - has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; if (!params_base.speculative.model.path.empty() || !params_base.speculative.model.hf_repo.empty()) { SRV_INF("loading draft model '%s'\n", params_base.speculative.model.path.c_str()); @@ -1939,10 +1972,8 @@ struct server_context { params_dft.n_ctx = params_base.speculative.n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_base.speculative.n_ctx; params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers; params_dft.n_parallel = 1; - - // force F16 KV cache for the draft model for extra performance - params_dft.cache_type_k = GGML_TYPE_F16; - params_dft.cache_type_v = GGML_TYPE_F16; + params_dft.cache_type_k = params_base.speculative.cache_type_k; + params_dft.cache_type_v = params_base.speculative.cache_type_v; llama_init_dft = common_init_from_params(params_dft); @@ -2082,6 +2113,7 @@ struct server_context { /* use_jinja */ params_base.use_jinja, /* prefill_assistant */ params_base.prefill_assistant, /* reasoning_format */ params_base.reasoning_format, + /* chat_template_kwargs */ params_base.default_template_kwargs, /* common_chat_templates */ chat_templates.get(), /* allow_image */ mctx ? mtmd_support_vision(mctx) : false, /* allow_audio */ mctx ? mtmd_support_audio (mctx) : false, @@ -2188,10 +2220,6 @@ struct server_context { slot.params.n_predict = slot.n_predict; } - if (slot.params.ignore_eos && has_eos_token) { - slot.params.sampling.logit_bias.push_back({llama_vocab_eos(vocab), -INFINITY}); - } - { if (slot.smpl != nullptr) { common_sampler_free(slot.smpl); @@ -2552,12 +2580,14 @@ struct server_context { continue; } - const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]); - if (embd == NULL) { + const float * embd = nullptr; + if (llama_pooling_type(slot.ctx) == LLAMA_POOLING_TYPE_NONE) { embd = llama_get_embeddings_ith(ctx, i); + } else { + embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]); } - if (embd == NULL) { + if (embd == nullptr) { SLT_ERR(slot, "failed to get embeddings, token = %d, seq_id = %d\n", batch.token[i], batch.seq_id[i][0]); res->embedding.push_back(std::vector(n_embd, 0.0f)); @@ -2565,12 +2595,12 @@ struct server_context { } // normalize only when there is pooling - // TODO: configurable if (llama_pooling_type(slot.ctx) != LLAMA_POOLING_TYPE_NONE) { common_embd_normalize(embd, embd_res.data(), n_embd, 2); res->embedding.push_back(embd_res); + break; } else { - res->embedding.push_back({ embd, embd + n_embd }); + res->embedding.emplace_back(embd, embd + n_embd); } } @@ -2730,6 +2760,7 @@ struct server_context { queue_tasks.defer(std::move(task)); break; } + if (slot->is_processing()) { // if requested slot is unavailable, we defer this task for processing later SRV_DBG("requested slot is unavailable, defer task, id_task = %d\n", task.id); @@ -3092,7 +3123,14 @@ struct server_context { continue; } - if (slot.is_non_causal()) { + // TODO: support memory-less logits computation + if (slot.need_logits() && !llama_get_memory(ctx)) { + slot.release(); + send_error(slot, "the current context does not logits computation. skipping", ERROR_TYPE_SERVER); + continue; + } + + if (!slot.can_split()) { if (slot.n_prompt_tokens > n_ubatch) { slot.release(); send_error(slot, "input is too large to process. increase the physical batch size", ERROR_TYPE_SERVER); @@ -3227,8 +3265,7 @@ struct server_context { } if (slot.n_past == slot.n_prompt_tokens && slot.n_past > 0) { - // we have to evaluate at least 1 token to generate logits. - SLT_WRN(slot, "need to evaluate at least 1 token to generate logits, n_past = %d, n_prompt_tokens = %d\n", slot.n_past, slot.n_prompt_tokens); + SLT_WRN(slot, "need to evaluate at least 1 token for each active slot, n_past = %d, n_prompt_tokens = %d\n", slot.n_past, slot.n_prompt_tokens); slot.n_past--; } @@ -3236,8 +3273,7 @@ struct server_context { slot.n_prompt_tokens_processed = 0; } - // non-causal tasks require to fit the entire prompt in the physical batch - if (slot.is_non_causal()) { + if (!slot.can_split()) { // cannot fit the prompt in the current batch - will try next iter if (batch.n_tokens + slot.n_prompt_tokens > n_batch) { continue; @@ -3259,8 +3295,7 @@ struct server_context { slot.cache_tokens.keep_first(slot.n_past); // check if we should process the image - if (slot.n_past < slot.n_prompt_tokens - && slot.prompt_tokens[slot.n_past] == LLAMA_TOKEN_NULL) { + if (slot.n_past < slot.n_prompt_tokens && slot.prompt_tokens[slot.n_past] == LLAMA_TOKEN_NULL) { // process the image int32_t new_n_past; int32_t res = slot.prompt_tokens.process_chunk(ctx, mctx, slot.n_past, slot.id, new_n_past); @@ -3291,8 +3326,8 @@ struct server_context { break; // end of text chunk } - // without pooling, we want to output the embeddings for all the tokens in the batch - const bool need_embd = slot.task_type == SERVER_TASK_TYPE_EMBEDDING && llama_pooling_type(slot.ctx) == LLAMA_POOLING_TYPE_NONE; + // embedding requires all tokens in the batch to be output + const bool need_embd = server_task_type_need_embd(slot.task_type); common_batch_add(batch, cur_tok, slot.n_past, { slot.id }, need_embd); slot.cache_tokens.push_back(cur_tok); @@ -3346,41 +3381,10 @@ struct server_context { SRV_DBG("decoding batch, n_tokens = %d\n", batch.n_tokens); if (slot_batched) { - // make sure we're in the right embedding mode - llama_set_embeddings(ctx, slot_batched->is_non_causal()); // apply lora, only need to do it once per batch common_set_adapter_lora(ctx, slot_batched->lora); - } - - const bool do_encode = (params_base.embedding || params_base.reranking); - - // pad the batch so that batch.n_tokens >= n_slots - // TODO: temporary workaround for https://github.com/ggml-org/llama.cpp/issues/13689 - if (do_encode) { - const int n_slots = slots.size(); - - if (batch.n_tokens < n_slots) { - std::set seq_ids; - for (int j = 0; j < batch.n_tokens; ++j) { - seq_ids.insert(batch.seq_id[j][0]); - } - - // find unused sequence id - llama_seq_id seq_id = -1; - for (int i = 0; i < n_slots; ++i) { - if (seq_ids.find(i) == seq_ids.end()) { - seq_id = i; - } - } - - const int n_add = n_slots - batch.n_tokens; - - SRV_WRN("adding %d dummy tokens to the batch, seq_id = %d\n", n_add, seq_id); - for (int j = 0; j < n_add; ++j) { - common_batch_add(batch, 0, j, { seq_id }, false); - } - } + llama_set_embeddings(ctx, slot_batched->need_embd()); } int32_t i_next = 0; @@ -3416,9 +3420,12 @@ struct server_context { } if (ret < -1) { + // TODO: update slot state based on llama_memory_seq_pos_min() and llama_memory_seq_pos_max() err = "Compute error."; } + // TODO: handle ret == 2 (abort) when we start aborting + if (!err.empty()) { SRV_ERR("%s, i = %d, n_batch = %d, ret = %d\n", err.c_str(), i, n_batch, ret); for (auto & slot : slots) { @@ -4174,11 +4181,6 @@ int main(int argc, char ** argv) { oaicompat_type oaicompat) -> void { GGML_ASSERT(type == SERVER_TASK_TYPE_COMPLETION || type == SERVER_TASK_TYPE_INFILL); - if (ctx_server.params_base.embedding) { - res_error(res, format_error_response("This server does not support completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED)); - return; - } - auto completion_id = gen_chatcmplid(); std::unordered_set task_ids; try { @@ -4433,12 +4435,8 @@ int main(int argc, char ** argv) { OAICOMPAT_TYPE_NONE); // infill is not OAI compatible }; - const auto handle_chat_completions = [&ctx_server, &res_error, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { + const auto handle_chat_completions = [&ctx_server, &handle_completions_impl](const httplib::Request & req, httplib::Response & res) { LOG_DBG("request: %s\n", req.body.c_str()); - if (ctx_server.params_base.embedding) { - res_error(res, format_error_response("This server does not support completions. Start it without `--embeddings`", ERROR_TYPE_NOT_SUPPORTED)); - return; - } auto body = json::parse(req.body); std::vector files; @@ -4566,13 +4564,18 @@ int main(int argc, char ** argv) { }; const auto handle_embeddings_impl = [&ctx_server, &res_error, &res_ok](const httplib::Request & req, httplib::Response & res, oaicompat_type oaicompat) { - const json body = json::parse(req.body); + if (!ctx_server.params_base.embedding) { + res_error(res, format_error_response("This server does not support embeddings. Start it with `--embeddings`", ERROR_TYPE_NOT_SUPPORTED)); + return; + } if (oaicompat != OAICOMPAT_TYPE_NONE && llama_pooling_type(ctx_server.ctx) == LLAMA_POOLING_TYPE_NONE) { res_error(res, format_error_response("Pooling type 'none' is not OAI compatible. Please use a different pooling type", ERROR_TYPE_INVALID_REQUEST)); return; } + const json body = json::parse(req.body); + // for the shape of input/content, see tokenize_input_prompts() json prompt; if (body.count("input") != 0) { @@ -4662,8 +4665,8 @@ int main(int argc, char ** argv) { }; const auto handle_rerank = [&ctx_server, &res_error, &res_ok](const httplib::Request & req, httplib::Response & res) { - if (!ctx_server.params_base.reranking || ctx_server.params_base.embedding) { - res_error(res, format_error_response("This server does not support reranking. Start it with `--reranking` and without `--embedding`", ERROR_TYPE_NOT_SUPPORTED)); + if (!ctx_server.params_base.embedding || ctx_server.params_base.pooling_type != LLAMA_POOLING_TYPE_RANK) { + res_error(res, format_error_response("This server does not support reranking. Start it with `--reranking`", ERROR_TYPE_NOT_SUPPORTED)); return; } @@ -4804,14 +4807,14 @@ int main(int argc, char ** argv) { // register static assets routes if (!params.public_path.empty()) { // Set the base directory for serving static files - bool is_found = svr->set_mount_point("/", params.public_path); + bool is_found = svr->set_mount_point(params.api_prefix + "/", params.public_path); if (!is_found) { LOG_ERR("%s: static assets path not found: %s\n", __func__, params.public_path.c_str()); return 1; } } else { // using embedded static index.html - svr->Get("/", [](const httplib::Request & req, httplib::Response & res) { + svr->Get(params.api_prefix + "/", [](const httplib::Request & req, httplib::Response & res) { if (req.get_header_value("Accept-Encoding").find("gzip") == std::string::npos) { res.set_content("Error: gzip is not supported by this browser", "text/plain"); } else { @@ -4827,37 +4830,37 @@ int main(int argc, char ** argv) { } // register API routes - svr->Get ("/health", handle_health); // public endpoint (no API key check) - svr->Get ("/metrics", handle_metrics); - svr->Get ("/props", handle_props); - svr->Post("/props", handle_props_change); - svr->Post("/api/show", handle_api_show); - svr->Get ("/models", handle_models); // public endpoint (no API key check) - svr->Get ("/v1/models", handle_models); // public endpoint (no API key check) - svr->Get ("/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check) - svr->Post("/completion", handle_completions); // legacy - svr->Post("/completions", handle_completions); - svr->Post("/v1/completions", handle_completions_oai); - svr->Post("/chat/completions", handle_chat_completions); - svr->Post("/v1/chat/completions", handle_chat_completions); - svr->Post("/api/chat", handle_chat_completions); // ollama specific endpoint - svr->Post("/infill", handle_infill); - svr->Post("/embedding", handle_embeddings); // legacy - svr->Post("/embeddings", handle_embeddings); - svr->Post("/v1/embeddings", handle_embeddings_oai); - svr->Post("/rerank", handle_rerank); - svr->Post("/reranking", handle_rerank); - svr->Post("/v1/rerank", handle_rerank); - svr->Post("/v1/reranking", handle_rerank); - svr->Post("/tokenize", handle_tokenize); - svr->Post("/detokenize", handle_detokenize); - svr->Post("/apply-template", handle_apply_template); + svr->Get (params.api_prefix + "/health", handle_health); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/metrics", handle_metrics); + svr->Get (params.api_prefix + "/props", handle_props); + svr->Post(params.api_prefix + "/props", handle_props_change); + svr->Post(params.api_prefix + "/api/show", handle_api_show); + svr->Get (params.api_prefix + "/models", handle_models); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/v1/models", handle_models); // public endpoint (no API key check) + svr->Get (params.api_prefix + "/api/tags", handle_models); // ollama specific endpoint. public endpoint (no API key check) + svr->Post(params.api_prefix + "/completion", handle_completions); // legacy + svr->Post(params.api_prefix + "/completions", handle_completions); + svr->Post(params.api_prefix + "/v1/completions", handle_completions_oai); + svr->Post(params.api_prefix + "/chat/completions", handle_chat_completions); + svr->Post(params.api_prefix + "/v1/chat/completions", handle_chat_completions); + svr->Post(params.api_prefix + "/api/chat", handle_chat_completions); // ollama specific endpoint + svr->Post(params.api_prefix + "/infill", handle_infill); + svr->Post(params.api_prefix + "/embedding", handle_embeddings); // legacy + svr->Post(params.api_prefix + "/embeddings", handle_embeddings); + svr->Post(params.api_prefix + "/v1/embeddings", handle_embeddings_oai); + svr->Post(params.api_prefix + "/rerank", handle_rerank); + svr->Post(params.api_prefix + "/reranking", handle_rerank); + svr->Post(params.api_prefix + "/v1/rerank", handle_rerank); + svr->Post(params.api_prefix + "/v1/reranking", handle_rerank); + svr->Post(params.api_prefix + "/tokenize", handle_tokenize); + svr->Post(params.api_prefix + "/detokenize", handle_detokenize); + svr->Post(params.api_prefix + "/apply-template", handle_apply_template); // LoRA adapters hotswap - svr->Get ("/lora-adapters", handle_lora_adapters_list); - svr->Post("/lora-adapters", handle_lora_adapters_apply); + svr->Get (params.api_prefix + "/lora-adapters", handle_lora_adapters_list); + svr->Post(params.api_prefix + "/lora-adapters", handle_lora_adapters_apply); // Save & load slots - svr->Get ("/slots", handle_slots); - svr->Post("/slots/:id_slot", handle_slots_action); + svr->Get (params.api_prefix + "/slots", handle_slots); + svr->Post(params.api_prefix + "/slots/:id_slot", handle_slots_action); // // Start the server @@ -4878,7 +4881,9 @@ int main(int argc, char ** argv) { }; bool was_bound = false; + bool is_sock = false; if (string_ends_with(std::string(params.hostname), ".sock")) { + is_sock = true; LOG_INF("%s: setting address family to AF_UNIX\n", __func__); svr->set_address_family(AF_UNIX); // bind_to_port requires a second arg, any value other than 0 should @@ -4956,7 +4961,9 @@ int main(int argc, char ** argv) { SetConsoleCtrlHandler(reinterpret_cast(console_ctrl_handler), true); #endif - LOG_INF("%s: server is listening on http://%s:%d - starting the main loop\n", __func__, params.hostname.c_str(), params.port); + LOG_INF("%s: server is listening on %s - starting the main loop\n", __func__, + is_sock ? string_format("unix://%s", params.hostname.c_str()).c_str() : + string_format("http://%s:%d", params.hostname.c_str(), params.port).c_str()); // this call blocks the main thread until queue_tasks.terminate() is called ctx_server.queue_tasks.start_loop(); diff --git a/tools/server/tests/tests.sh b/tools/server/tests/tests.sh index 33fa8cc6464e2..709b5841aa49b 100755 --- a/tools/server/tests/tests.sh +++ b/tools/server/tests/tests.sh @@ -1,4 +1,4 @@ -#!/bin/bash +#!/usr/bin/env bash # make sure we are in the right directory SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) diff --git a/tools/server/tests/unit/test_chat_completion.py b/tools/server/tests/unit/test_chat_completion.py index 1b5205f79d610..7ee9a1651400d 100644 --- a/tools/server/tests/unit/test_chat_completion.py +++ b/tools/server/tests/unit/test_chat_completion.py @@ -132,6 +132,28 @@ def test_chat_template(): assert res.body["__verbose"]["prompt"] == " <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" +@pytest.mark.parametrize("prefill,re_prefill", [ + ("Whill", "Whill"), + ([{"type": "text", "text": "Wh"}, {"type": "text", "text": "ill"}], "Whill"), +]) +def test_chat_template_assistant_prefill(prefill, re_prefill): + global server + server.chat_template = "llama3" + server.debug = True # to get the "__verbose" object in the response + server.start() + res = server.make_request("POST", "/chat/completions", data={ + "max_tokens": 8, + "messages": [ + {"role": "system", "content": "Book"}, + {"role": "user", "content": "What is the best book"}, + {"role": "assistant", "content": prefill}, + ] + }) + assert res.status_code == 200 + assert "__verbose" in res.body + assert res.body["__verbose"]["prompt"] == f" <|start_header_id|>system<|end_header_id|>\n\nBook<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat is the best book<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n{re_prefill}" + + def test_apply_chat_template(): global server server.chat_template = "command-r" @@ -228,6 +250,7 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re [{"role": "system", "content": 123}], # [{"content": "hello"}], # TODO: should not be a valid case [{"role": "system", "content": "test"}, {}], + [{"role": "user", "content": "test"}, {"role": "assistant", "content": "test"}, {"role": "assistant", "content": "test"}], ]) def test_invalid_chat_completion_req(messages): global server diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index f3e0392a4e9d1..f3dfc8225da4d 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -11,6 +11,8 @@ // increase max payload length to allow use of larger context size #define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576 +// increase backlog size to avoid connection resets for >> 1 slots +#define CPPHTTPLIB_LISTEN_BACKLOG 512 // disable Nagle's algorithm #define CPPHTTPLIB_TCP_NODELAY true #include @@ -271,12 +273,20 @@ static llama_tokens format_rerank(const struct llama_vocab * vocab, const llama_ } result.reserve(doc.size() + query.size() + 4); - result.push_back(llama_vocab_bos(vocab)); + if (llama_vocab_get_add_bos(vocab)) { + result.push_back(llama_vocab_bos(vocab)); + } result.insert(result.end(), query.begin(), query.end()); - result.push_back(eos_token); - result.push_back(llama_vocab_sep(vocab)); + if (llama_vocab_get_add_eos(vocab)) { + result.push_back(eos_token); + } + if (llama_vocab_get_add_sep(vocab)) { + result.push_back(llama_vocab_sep(vocab)); + } result.insert(result.end(), doc.begin(), doc.end()); - result.push_back(eos_token); + if (llama_vocab_get_add_eos(vocab)) { + result.push_back(eos_token); + } return result; } @@ -571,6 +581,7 @@ struct oaicompat_parser_options { bool use_jinja; bool prefill_assistant; common_reasoning_format reasoning_format; + std::map chat_template_kwargs; common_chat_templates * tmpls; bool allow_image; bool allow_audio; @@ -748,6 +759,13 @@ static json oaicompat_chat_params_parse( llama_params["parse_tool_calls"] = true; } + // merge the template args provided from command line with the args provided in the user request + auto chat_template_kwargs_object = json_value(body, "chat_template_kwargs", json::object()); + inputs.chat_template_kwargs = opt.chat_template_kwargs; + for (const auto & item : chat_template_kwargs_object.items()) { + inputs.chat_template_kwargs[item.key()] = item.value().dump(); + } + // if the assistant message appears at the end of list, we do not add end-of-turn token // for ex. this can be useful to modify the reasoning process in reasoning models bool prefill_assistant_message = !inputs.messages.empty() && inputs.messages.back().role == "assistant" && opt.prefill_assistant; @@ -763,6 +781,11 @@ static json oaicompat_chat_params_parse( /* TODO: test this properly */ inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE; + + if ( (!inputs.enable_thinking) || inputs.chat_template_kwargs.find("enable_thinking") != inputs.chat_template_kwargs.end()) { + throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking."); + } + inputs.add_generation_prompt = true; } @@ -771,7 +794,13 @@ static json oaicompat_chat_params_parse( /* Append assistant prefilled message */ if (prefill_assistant_message) { - chat_params.prompt += last_message.content; + if (!last_message.content_parts.empty()) { + for (auto & p : last_message.content_parts) { + chat_params.prompt += p.text; + } + } else { + chat_params.prompt += last_message.content; + } } llama_params["chat_format"] = static_cast(chat_params.format); diff --git a/tools/server/webui/src/components/Sidebar.tsx b/tools/server/webui/src/components/Sidebar.tsx index a77cb83b45dd7..b52a8df03c969 100644 --- a/tools/server/webui/src/components/Sidebar.tsx +++ b/tools/server/webui/src/components/Sidebar.tsx @@ -231,7 +231,7 @@ function ConversationItem({ > {conv.name} -
+